Update v4 format version from 402 to 403.

Without personalization:
Total words: 1134774, Success Num: 899230, Success Percentage: 79.243%
Bad Failures, with auto-correction (typed word == expected word, output word != expected word): 1871, Bad Failure Percentage: 0.165%
Failures, with auto-correction (F-C): 29084, F-C Percentage: 2.563%
Max Keystrokes: 6072959, Min Keystrokes: 4436090, Keystroke Saving Percentage:26.953%

Before:
Total words: 1134646, Success Num: 925194, Success Percentage: 81.540%
Bad Failures, with auto-correction (typed word == expected word, output word != expected word): 1316, Bad Failure Percentage: 0.116%
Failures, with auto-correction (F-C): 28288, F-C Percentage: 2.493%
Max Keystrokes: 6072831, Min Keystrokes: 3946188, Keystroke Saving Percentage:35.019%

After
Total words: 1134659, Success Num: 944746, Success Percentage: 83.263%
Bad Failures, with auto-correction (typed word == expected word, output word != expected word): 1258, Bad Failure Percentage: 0.111%
Failures, with auto-correction (F-C): 28016, F-C Percentage: 2.469%
Max Keystrokes: 6072844, Min Keystrokes: 3387333, Keystroke Saving Percentage:44.222%

Change-Id: I3af42ec37a11847c0429c28616e726f6a339247f
main
Keisuke Kuroyanagi 2014-10-30 12:54:06 +09:00
parent c611989929
commit ea468cc9de
11 changed files with 73 additions and 112 deletions

View File

@ -120,7 +120,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
private static boolean needsToMigrateDictionary(final int formatVersion) {
// When we bump up the dictionary format version, the old version should be added to here
// for supporting migration. Note that native code has to support reading such formats.
return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING;
return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
|| formatVersion == FormatSpec.VERSION402;
}
public boolean isValidDictionaryLocked() {

View File

@ -175,9 +175,10 @@ public final class FormatSpec {
public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201;
// Dictionary version used for testing.
public static final int VERSION4_ONLY_FOR_TESTING = 399;
public static final int VERSION401 = 401;
public static final int VERSION4 = 402;
public static final int VERSION4_DEV = 403;
public static final int VERSION402 = 402;
public static final int VERSION403 = 403;
public static final int VERSION4 = VERSION403;
public static final int VERSION4_DEV = VERSION403;
static final int MINIMUM_SUPPORTED_STATIC_VERSION = VERSION202;
static final int MAXIMUM_SUPPORTED_STATIC_VERSION = VERSION202;
static final int MINIMUM_SUPPORTED_DYNAMIC_VERSION = VERSION4;

View File

@ -141,10 +141,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return FormatUtils::VERSION_202;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
case FormatUtils::VERSION_4:
return FormatUtils::VERSION_4;
case FormatUtils::VERSION_4_DEV:
return FormatUtils::VERSION_4_DEV;
case FormatUtils::VERSION_402:
return FormatUtils::VERSION_402;
case FormatUtils::VERSION_403:
return FormatUtils::VERSION_403;
default:
return FormatUtils::UNKNOWN_VERSION;
}
@ -247,7 +247,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
}
bool supportsBeginningOfSentence() const {
return mDictFormatVersion >= FormatUtils::VERSION_4;
return mDictFormatVersion >= FormatUtils::VERSION_402;
}
const int *getCodePointTable() const {

View File

@ -115,8 +115,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
// None of the static dictionaries (v2x) support writing
return false;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
case FormatUtils::VERSION_4_DEV:
case FormatUtils::VERSION_402:
case FormatUtils::VERSION_403:
return buffer->writeUintAndAdvancePosition(version /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:

View File

@ -58,7 +58,7 @@ namespace latinime {
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
switch (dictFormatVersion) {
case FormatUtils::VERSION_4: {
case FormatUtils::VERSION_402: {
return newPolicyForOnMemoryV4Dict<backward::v402::Ver4DictConstants,
backward::v402::Ver4DictBuffers,
backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
@ -66,7 +66,7 @@ namespace latinime {
dictFormatVersion, locale, attributeMap);
}
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4_DEV: {
case FormatUtils::VERSION_403: {
return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers,
Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
dictFormatVersion, locale, attributeMap);
@ -118,7 +118,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
case FormatUtils::VERSION_202:
AKLOGE("Given path is a directory but the format is version 2xx. path: %s", path);
break;
case FormatUtils::VERSION_4: {
case FormatUtils::VERSION_402: {
return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
backward::v402::Ver4DictBuffers,
backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
@ -126,7 +126,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
headerFilePath, formatVersion, std::move(mmappedBuffer));
}
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4_DEV: {
case FormatUtils::VERSION_403: {
return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers,
Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
headerFilePath, formatVersion, std::move(mmappedBuffer));
@ -184,8 +184,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4:
case FormatUtils::VERSION_4_DEV:
case FormatUtils::VERSION_402:
case FormatUtils::VERSION_403:
AKLOGE("Given path is a file but the format is version 4. path: %s", path);
break;
default:

View File

@ -44,13 +44,13 @@ const int DictFileWritingUtils::SIZE_OF_BUFFER_SIZE_FIELD = 4;
TimeKeeper::setCurrentTime();
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
switch (formatVersion) {
case FormatUtils::VERSION_4:
case FormatUtils::VERSION_402:
return createEmptyV4DictFile<backward::v402::Ver4DictConstants,
backward::v402::Ver4DictBuffers,
backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr>(
filePath, localeAsCodePointVector, attributeMap, formatVersion);
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
case FormatUtils::VERSION_4_DEV:
case FormatUtils::VERSION_403:
return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers,
Ver4DictBuffers::Ver4DictBuffersPtr>(
filePath, localeAsCodePointVector, attributeMap, formatVersion);

View File

@ -35,10 +35,10 @@ const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
return VERSION_202;
case VERSION_4_ONLY_FOR_TESTING:
return VERSION_4_ONLY_FOR_TESTING;
case VERSION_4:
return VERSION_4;
case VERSION_4_DEV:
return VERSION_4_DEV;
case VERSION_402:
return VERSION_402;
case VERSION_403:
return VERSION_403;
default:
return UNKNOWN_VERSION;
}

View File

@ -38,8 +38,8 @@ class FormatUtils {
VERSION_201 = 201,
VERSION_202 = 202,
VERSION_4_ONLY_FOR_TESTING = 399,
VERSION_4 = 402,
VERSION_4_DEV = 403,
VERSION_402 = 402,
VERSION_403 = 403,
UNKNOWN_VERSION = -1
};

View File

@ -62,14 +62,14 @@ TEST(FormatUtilsTest, TestDetectFormatVersion) {
}
{
const std::vector<uint8_t> buffer =
getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_4, 0, 0);
EXPECT_EQ(FormatUtils::VERSION_4, FormatUtils::detectFormatVersion(
getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_402, 0, 0);
EXPECT_EQ(FormatUtils::VERSION_402, FormatUtils::detectFormatVersion(
ReadOnlyByteArrayView(buffer.data(), buffer.size())));
}
{
const std::vector<uint8_t> buffer =
getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_4_DEV, 0, 0);
EXPECT_EQ(FormatUtils::VERSION_4_DEV, FormatUtils::detectFormatVersion(
getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_403, 0, 0);
EXPECT_EQ(FormatUtils::VERSION_403, FormatUtils::detectFormatVersion(
ReadOnlyByteArrayView(buffer.data(), buffer.size())));
}

View File

@ -49,7 +49,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
private static final String TEST_LOCALE = "test";
private static final int DUMMY_PROBABILITY = 0;
private static final int[] DICT_FORMAT_VERSIONS =
new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
new int[] { FormatSpec.VERSION402, FormatSpec.VERSION403, FormatSpec.VERSION4_DEV };
private static final String DICTIONARY_ID = "TestDecayingBinaryDictionary";
private int mCurrentTime = 0;
@ -73,11 +73,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
}
private static boolean supportsCountBasedNgram(final int formatVersion) {
return formatVersion >= FormatSpec.VERSION4_DEV;
return formatVersion >= FormatSpec.VERSION403;
}
private static boolean supportsNgram(final int formatVersion) {
return formatVersion >= FormatSpec.VERSION4_DEV;
return formatVersion >= FormatSpec.VERSION403;
}
private void onInputWord(final BinaryDictionary binaryDictionary, final String word,

View File

@ -45,19 +45,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
private static final String TEST_LOCALE = "test";
private static final int[] DICT_FORMAT_VERSIONS =
new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
new int[] { FormatSpec.VERSION402, FormatSpec.VERSION403, FormatSpec.VERSION4_DEV };
private static final String DICTIONARY_ID = "TestBinaryDictionary";
private static boolean canCheckBigramProbability(final int formatVersion) {
return formatVersion > FormatSpec.VERSION401;
}
private static boolean supportsBeginningOfSentence(final int formatVersion) {
return formatVersion > FormatSpec.VERSION401;
}
private static boolean supportsNgram(final int formatVersion) {
return formatVersion >= FormatSpec.VERSION4_DEV;
return formatVersion >= FormatSpec.VERSION403;
}
private HashSet<File> mDictFilesToBeDeleted = new HashSet<>();
@ -84,19 +76,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion,
final HashMap<String, String> attributeMap) {
if (formatVersion == FormatSpec.VERSION4
|| formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
|| formatVersion == FormatSpec.VERSION4_DEV) {
try {
final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
attributeMap);
mDictFilesToBeDeleted.add(dictFile);
return dictFile;
} catch (final IOException e) {
fail(e.toString());
}
} else {
fail("Dictionary format version " + formatVersion + " is not supported.");
try {
final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
attributeMap);
mDictFilesToBeDeleted.add(dictFile);
return dictFile;
} catch (final IOException e) {
fail(e.toString());
}
return null;
}
@ -350,18 +336,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
if (canCheckBigramProbability(formatVersion)) {
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
}
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
if (canCheckBigramProbability(formatVersion)) {
assertEquals(updatedBigramProbability,
getBigramProbability(binaryDictionary, "aaa", "abb"));
}
assertEquals(updatedBigramProbability,
getBigramProbability(binaryDictionary, "aaa", "abb"));
assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
@ -381,17 +363,12 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "abc", unigramProbability);
addUnigramWord(binaryDictionary, "f", unigramProbability);
if (canCheckBigramProbability(formatVersion)) {
assertEquals(bigramProbability,
getBigramProbability(binaryDictionary, "abcde", "fghij"));
}
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abcde", "fghij"));
assertEquals(Dictionary.NOT_A_PROBABILITY,
getBigramProbability(binaryDictionary, "abcde", "fgh"));
addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
if (canCheckBigramProbability(formatVersion)) {
assertEquals(updatedBigramProbability,
getBigramProbability(binaryDictionary, "abcde", "fghij"));
}
assertEquals(updatedBigramProbability,
getBigramProbability(binaryDictionary, "abcde", "fghij"));
}
public void testRandomlyAddBigramWords() {
@ -441,10 +418,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int bigramProbability = bigramProbabilities.get(bigram);
assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
isValidBigram(binaryDictionary, bigram.first, bigram.second));
if (canCheckBigramProbability(formatVersion)) {
assertEquals(bigramProbability,
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
}
assertEquals(bigramProbability,
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
}
}
@ -594,12 +569,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
if (canCheckBigramProbability(formatVersion)) {
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
}
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
@ -661,10 +634,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int bigramProbability = bigramProbabilities.get(bigram);
assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
isValidBigram(binaryDictionary, bigram.first, bigram.second));
if (canCheckBigramProbability(formatVersion)) {
assertEquals(bigramProbability,
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
}
assertEquals(bigramProbability,
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
}
}
@ -768,10 +739,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
probability = Dictionary.NOT_A_PROBABILITY;
}
if (canCheckBigramProbability(formatVersion)) {
assertEquals(probability,
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
}
assertEquals(probability,
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
isValidBigram(binaryDictionary, bigram.first, bigram.second));
}
@ -971,10 +940,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1));
if (canCheckBigramProbability(formatVersion)) {
final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
assertEquals(bigramProbability, bigramTarget.getProbability());
}
final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
assertEquals(bigramProbability, bigramTarget.getProbability());
}
}
}
@ -1057,10 +1024,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1));
final Pair<String, String> bigram = new Pair<>(word0, word1);
if (canCheckBigramProbability(formatVersion)) {
final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
assertEquals(bigramProbability, bigramTarget.getProbability());
}
final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
assertEquals(bigramProbability, bigramTarget.getProbability());
bigramSet.remove(bigram);
}
}
@ -1198,7 +1163,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
public void testPossiblyOffensiveAttributeMaintained() {
final BinaryDictionary binaryDictionary =
getEmptyBinaryDictionary(FormatSpec.VERSION4_DEV);
getEmptyBinaryDictionary(FormatSpec.VERSION403);
binaryDictionary.addUnigramEntry("ddd", 100, null, Dictionary.NOT_A_PROBABILITY,
false, true, true, 0);
WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false);
@ -1236,11 +1201,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
if (canCheckBigramProbability(toFormatVersion)) {
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
NgramContext.BEGINNING_OF_SENTENCE, "aaa"));
}
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
NgramContext.BEGINNING_OF_SENTENCE, "aaa"));
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
WordProperty wordProperty = binaryDictionary.getWordProperty("ccc",
false /* isBeginningOfSentence */);
@ -1311,10 +1274,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.getPropertyForGettingStats(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
for (final Pair<String, String> bigram : bigrams) {
if (canCheckBigramProbability(toFormatVersion)) {
assertEquals((int)bigramProbabilities.get(bigram),
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
}
assertEquals((int)bigramProbabilities.get(bigram),
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
}
assertEquals(bigramProbabilities.size(), Integer.parseInt(
@ -1323,9 +1284,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
public void testBeginningOfSentence() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
if (supportsBeginningOfSentence(formatVersion)) {
testBeginningOfSentence(formatVersion);
}
testBeginningOfSentence(formatVersion);
}
}