Merge "Add VERSION4_TEST for testing."
commit
eaa347bc1a
|
@ -540,7 +540,9 @@ public final class BinaryDictionary extends Dictionary {
|
|||
close();
|
||||
final File dictFile = new File(mDictFilePath);
|
||||
final File tmpDictFile = new File(tmpDictFilePath);
|
||||
FileUtils.deleteRecursively(dictFile);
|
||||
if (!FileUtils.deleteRecursively(dictFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -112,8 +112,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
}
|
||||
|
||||
private boolean needsToMigrateDictionary(final int formatVersion) {
|
||||
// TODO: Check version.
|
||||
return false;
|
||||
// When we bump up the dictionary format version, the old version should be added to here
|
||||
// for supporting migration. Note that native code has to support reading such formats.
|
||||
return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING;
|
||||
}
|
||||
|
||||
public boolean isValidDictionaryLocked() {
|
||||
|
|
|
@ -186,7 +186,12 @@ public final class FormatSpec {
|
|||
// From version 4 on, we use version * 100 + revision as a version number. That allows
|
||||
// us to change the format during development while having testing devices remove
|
||||
// older files with each upgrade, while still having a readable versioning scheme.
|
||||
// When we bump up the dictionary format version, we should update
|
||||
// ExpandableDictionary.needsToMigrateDictionary() and
|
||||
// ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType().
|
||||
public static final int VERSION2 = 2;
|
||||
// Dictionary version used for testing.
|
||||
public static final int VERSION4_ONLY_FOR_TESTING = 399;
|
||||
public static final int VERSION4 = 401;
|
||||
static final int MINIMUM_SUPPORTED_VERSION = VERSION2;
|
||||
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4;
|
||||
|
|
|
@ -139,6 +139,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
switch (mDictFormatVersion) {
|
||||
case FormatUtils::VERSION_2:
|
||||
return FormatUtils::VERSION_2;
|
||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
|
||||
case FormatUtils::VERSION_4:
|
||||
return FormatUtils::VERSION_4;
|
||||
default:
|
||||
|
|
|
@ -98,8 +98,9 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
|
|||
case FormatUtils::VERSION_2:
|
||||
// Version 2 dictionary writing is not supported.
|
||||
return false;
|
||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||
case FormatUtils::VERSION_4:
|
||||
return buffer->writeUintAndAdvancePosition(FormatUtils::VERSION_4 /* data */,
|
||||
return buffer->writeUintAndAdvancePosition(version /* data */,
|
||||
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -52,9 +52,11 @@ namespace latinime {
|
|||
DictionaryStructureWithBufferPolicyFactory:: newPolicyForOnMemoryDict(
|
||||
const int formatVersion, const std::vector<int> &locale,
|
||||
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
|
||||
switch (formatVersion) {
|
||||
FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
|
||||
switch (dictFormatVersion) {
|
||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||
case FormatUtils::VERSION_4: {
|
||||
HeaderPolicy headerPolicy(FormatUtils::VERSION_4, locale, attributeMap);
|
||||
HeaderPolicy headerPolicy(dictFormatVersion, locale, attributeMap);
|
||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
|
||||
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
|
||||
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
|
||||
|
@ -87,11 +89,13 @@ namespace latinime {
|
|||
if (!mmappedBuffer) {
|
||||
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
|
||||
}
|
||||
switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(),
|
||||
mmappedBuffer->getBufferSize())) {
|
||||
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
|
||||
mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize());
|
||||
switch (formatVersion) {
|
||||
case FormatUtils::VERSION_2:
|
||||
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
|
||||
break;
|
||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||
case FormatUtils::VERSION_4: {
|
||||
const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
|
||||
char dictPath[dictDirPathBufSize];
|
||||
|
@ -102,7 +106,8 @@ namespace latinime {
|
|||
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
|
||||
}
|
||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
||||
Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer)));
|
||||
Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer),
|
||||
formatVersion));
|
||||
if (!dictBuffers || !dictBuffers->isValid()) {
|
||||
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
|
||||
path);
|
||||
|
@ -135,6 +140,7 @@ namespace latinime {
|
|||
case FormatUtils::VERSION_2:
|
||||
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
|
||||
new PatriciaTriePolicy(std::move(mmappedBuffer)));
|
||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||
case FormatUtils::VERSION_4:
|
||||
AKLOGE("Given path is a file but the format is version 4. path: %s", path);
|
||||
break;
|
||||
|
|
|
@ -27,7 +27,8 @@
|
|||
namespace latinime {
|
||||
|
||||
/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
|
||||
const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer) {
|
||||
const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion) {
|
||||
if (!headerBuffer) {
|
||||
ASSERT(false);
|
||||
AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
|
||||
|
@ -35,7 +36,8 @@ namespace latinime {
|
|||
}
|
||||
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
|
||||
const bool isUpdatable = headerBuffer->isUpdatable();
|
||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable));
|
||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
|
||||
formatVersion));
|
||||
}
|
||||
|
||||
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||
|
@ -113,11 +115,12 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
|||
}
|
||||
|
||||
Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
|
||||
MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable)
|
||||
MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion)
|
||||
: mHeaderBuffer(std::move(headerBuffer)),
|
||||
mDictBuffer(MmappedBuffer::openBuffer(dictPath,
|
||||
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
|
||||
mHeaderPolicy(mHeaderBuffer->getBuffer(), FormatUtils::VERSION_4),
|
||||
mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
|
||||
mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
|
||||
mHeaderPolicy.getSize(),
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
|
|
|
@ -36,7 +36,8 @@ class Ver4DictBuffers {
|
|||
typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
|
||||
|
||||
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
|
||||
MmappedBuffer::MmappedBufferPtr headerBuffer);
|
||||
MmappedBuffer::MmappedBufferPtr headerBuffer,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion);
|
||||
|
||||
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
|
||||
const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
|
||||
|
@ -120,7 +121,8 @@ class Ver4DictBuffers {
|
|||
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
|
||||
|
||||
Ver4DictBuffers(const char *const dictDirPath,
|
||||
const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable);
|
||||
const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion);
|
||||
|
||||
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
|
||||
|
||||
|
|
|
@ -34,9 +34,12 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
|||
const int dictVersion, const std::vector<int> localeAsCodePointVector,
|
||||
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
|
||||
TimeKeeper::setCurrentTime();
|
||||
switch (dictVersion) {
|
||||
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
|
||||
switch (formatVersion) {
|
||||
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
|
||||
case FormatUtils::VERSION_4:
|
||||
return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap);
|
||||
return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap,
|
||||
formatVersion);
|
||||
default:
|
||||
AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
|
||||
filePath, dictVersion);
|
||||
|
@ -46,8 +49,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
|||
|
||||
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
|
||||
const std::vector<int> localeAsCodePointVector,
|
||||
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
|
||||
HeaderPolicy headerPolicy(FormatUtils::VERSION_4, localeAsCodePointVector, attributeMap);
|
||||
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion) {
|
||||
HeaderPolicy headerPolicy(formatVersion, localeAsCodePointVector, attributeMap);
|
||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
||||
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
|
||||
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE));
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -46,7 +47,8 @@ class DictFileWritingUtils {
|
|||
|
||||
static bool createEmptyV4DictFile(const char *const filePath,
|
||||
const std::vector<int> localeAsCodePointVector,
|
||||
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap);
|
||||
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion);
|
||||
|
||||
static bool flushBufferToFile(const char *const filePath,
|
||||
const BufferWithExtendableBuffer *const buffer);
|
||||
|
|
|
@ -25,6 +25,18 @@ const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE;
|
|||
// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12
|
||||
const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
|
||||
|
||||
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
|
||||
switch (formatVersion) {
|
||||
case VERSION_2:
|
||||
return VERSION_2;
|
||||
case VERSION_4_ONLY_FOR_TESTING:
|
||||
return VERSION_4_ONLY_FOR_TESTING;
|
||||
case VERSION_4:
|
||||
return VERSION_4;
|
||||
default:
|
||||
return UNKNOWN_VERSION;
|
||||
}
|
||||
}
|
||||
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
|
||||
const uint8_t *const dict, const int dictSize) {
|
||||
// The magic number is stored big-endian.
|
||||
|
@ -46,6 +58,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
|
|||
// same so we use them for both here.
|
||||
if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
|
||||
return VERSION_2;
|
||||
} else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) {
|
||||
return VERSION_4_ONLY_FOR_TESTING;
|
||||
} else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
|
||||
return VERSION_4;
|
||||
} else {
|
||||
|
|
|
@ -31,6 +31,7 @@ class FormatUtils {
|
|||
enum FORMAT_VERSION {
|
||||
// These MUST have the same values as the relevant constants in FormatSpec.java.
|
||||
VERSION_2 = 2,
|
||||
VERSION_4_ONLY_FOR_TESTING = 399,
|
||||
VERSION_4 = 401,
|
||||
UNKNOWN_VERSION = -1
|
||||
};
|
||||
|
@ -39,6 +40,7 @@ class FormatUtils {
|
|||
// unsupported or obsolete dictionary formats.
|
||||
static const uint32_t MAGIC_NUMBER;
|
||||
|
||||
static FORMAT_VERSION getFormatVersion(const int formatVersion);
|
||||
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
|
||||
|
||||
private:
|
||||
|
|
|
@ -46,21 +46,23 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
|
||||
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||
final int formatVersion) throws IOException {
|
||||
if (formatVersion == FormatSpec.VERSION4) {
|
||||
return createEmptyVer4DictionaryAndGetFile(dictId);
|
||||
if (formatVersion == FormatSpec.VERSION4
|
||||
|| formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING) {
|
||||
return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
|
||||
} else {
|
||||
throw new IOException("Dictionary format version " + formatVersion
|
||||
+ " is not supported.");
|
||||
}
|
||||
}
|
||||
|
||||
private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
|
||||
private File createEmptyVer4DictionaryAndGetFile(final String dictId,
|
||||
final int formatVersion) throws IOException {
|
||||
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
||||
getContext().getCacheDir());
|
||||
file.delete();
|
||||
file.mkdir();
|
||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||
if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4,
|
||||
if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
|
||||
Locale.ENGLISH, attributeMap)) {
|
||||
return file;
|
||||
} else {
|
||||
|
@ -1223,4 +1225,36 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testDictMigration() {
|
||||
testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, FormatSpec.VERSION4);
|
||||
}
|
||||
|
||||
private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
}
|
||||
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
final int unigramProbability = 100;
|
||||
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
|
||||
addUnigramWord(binaryDictionary, "bbb", unigramProbability);
|
||||
final int bigramProbability = 10;
|
||||
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
|
||||
assertTrue(binaryDictionary.isValidBigram("aaa", "bbb"));
|
||||
assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
|
||||
assertTrue(binaryDictionary.migrateTo(toFormatVersion));
|
||||
assertTrue(binaryDictionary.isValidDictionary());
|
||||
assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
|
||||
// TODO: Add tests for bigram frequency when the implementation gets ready.
|
||||
assertTrue(binaryDictionary.isValidBigram("aaa", "bbb"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue