Merge "Split the header into a separate file."

This commit is contained in:
Jean Chalard 2013-12-11 03:37:05 +00:00 committed by Android (Google) Code Review
commit a8ffdf0288
26 changed files with 155 additions and 121 deletions

View file

@ -36,27 +36,25 @@ public abstract class AbstractDictDecoder implements DictDecoder {
private static final int ERROR_CANNOT_READ = 1;
private static final int ERROR_WRONG_FORMAT = 2;
protected FileHeader readHeader(final DictBuffer dictBuffer)
protected FileHeader readHeader(final DictBuffer headerBuffer)
throws IOException, UnsupportedFormatException {
if (dictBuffer == null) {
if (headerBuffer == null) {
openDictBuffer();
}
final int version = HeaderReader.readVersion(dictBuffer);
final int version = HeaderReader.readVersion(headerBuffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Unsupported version : " + version);
}
// TODO: Remove this field.
final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
final int optionsFlags = HeaderReader.readOptionFlags(headerBuffer);
final int headerSize = HeaderReader.readHeaderSize(headerBuffer);
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
final HashMap<String, String> attributes = HeaderReader.readAttributes(headerBuffer,
headerSize);
final FileHeader header = new FileHeader(headerSize,

View file

@ -600,7 +600,7 @@ public final class BinaryDictDecoderUtils {
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, PtNode> reversePtNodeMapping = new TreeMap<Integer, PtNode>();
final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mHeaderSize,
final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mBodyOffset,
reverseNodeArrayMapping, reversePtNodeMapping, fileHeader.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);

View file

@ -62,7 +62,7 @@ public final class BinaryDictIOUtils {
* Retrieves all node arrays without recursive call.
*/
private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder,
final int headerSize, final Map<Integer, String> words,
final int bodyOffset, final Map<Integer, String> words,
final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams,
final FormatOptions formatOptions) {
@ -71,7 +71,7 @@ public final class BinaryDictIOUtils {
Stack<Position> stack = new Stack<Position>();
int index = 0;
Position initPos = new Position(headerSize, 0);
Position initPos = new Position(bodyOffset, 0);
stack.push(initPos);
while (!stack.empty()) {
@ -154,7 +154,7 @@ public final class BinaryDictIOUtils {
UnsupportedFormatException {
// Read header
final FileHeader header = dictDecoder.readHeader();
readUnigramsAndBigramsBinaryInner(dictDecoder, header.mHeaderSize, words,
readUnigramsAndBigramsBinaryInner(dictDecoder, header.mBodyOffset, words,
frequencies, bigrams, header.mFormatOptions);
}

View file

@ -199,13 +199,19 @@ public final class FormatSpec {
*/
public static final int MAGIC_NUMBER = 0x9BC13AFE;
static final int MINIMUM_SUPPORTED_VERSION = 2;
static final int MAXIMUM_SUPPORTED_VERSION = 4;
static final int NOT_A_VERSION_NUMBER = -1;
static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3;
static final int FIRST_VERSION_WITH_TERMINAL_ID = 4;
// These MUST have the same values as the relevant constants in format_utils.h.
// From version 4 on, we use version * 100 + revision as a version number. That allows
// us to change the format during development while having testing devices remove
// older files with each upgrade, while still having a readable versioning scheme.
public static final int VERSION2 = 2;
public static final int VERSION3 = 3;
public static final int VERSION4 = 4;
public static final int VERSION4 = 400;
static final int MINIMUM_SUPPORTED_VERSION = VERSION2;
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4;
// These options need to be the same numeric values as the one in the native reading code.
static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
@ -263,8 +269,10 @@ public final class FormatSpec {
static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2;
// These values are used only by version 4 or later.
public static final String TRIE_FILE_EXTENSION = ".trie";
// These values are used only by version 4 or later. They MUST match the definitions in
// ver4_dict_constants.cpp.
static final String TRIE_FILE_EXTENSION = ".trie";
public static final String HEADER_FILE_EXTENSION = ".header";
static final String FREQ_FILE_EXTENSION = ".freq";
static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp";
// tat = Terminal Address Table
@ -361,7 +369,7 @@ public final class FormatSpec {
* Class representing file header.
*/
public static final class FileHeader {
public final int mHeaderSize;
public final int mBodyOffset;
public final DictionaryOptions mDictionaryOptions;
public final FormatOptions mFormatOptions;
// Note that these are corresponding definitions in native code in latinime::HeaderPolicy
@ -377,9 +385,9 @@ public final class FormatSpec {
private static final String DICTIONARY_DESCRIPTION_ATTRIBUTE = "description";
public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
final FormatOptions formatOptions) {
mHeaderSize = headerSize;
mDictionaryOptions = dictionaryOptions;
mFormatOptions = formatOptions;
mBodyOffset = formatOptions.mVersion < VERSION4 ? headerSize : 0;
if (null == getLocaleString()) {
throw new RuntimeException("Cannot create a FileHeader without a locale");
}

View file

@ -45,10 +45,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
protected static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
protected static final int FILETYPE_BIGRAM_FREQ = 4;
protected static final int FILETYPE_SHORTCUT = 5;
protected static final int FILETYPE_HEADER = 6;
protected final File mDictDirectory;
protected final DictionaryBufferFactory mBufferFactory;
protected DictBuffer mDictBuffer;
protected DictBuffer mHeaderBuffer;
protected DictBuffer mFrequencyBuffer;
protected DictBuffer mTerminalAddressTableBuffer;
private BigramContentReader mBigramReader;
@ -83,7 +85,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
@UsedForTesting
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
mDictDirectory = dictDirectory;
mDictBuffer = mFrequencyBuffer = null;
mDictBuffer = mHeaderBuffer = mFrequencyBuffer = null;
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
@ -100,13 +102,16 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
/* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
mDictDirectory = dictDirectory;
mBufferFactory = factory;
mDictBuffer = mFrequencyBuffer = null;
mDictBuffer = mHeaderBuffer = mFrequencyBuffer = null;
}
protected File getFile(final int fileType) throws UnsupportedFormatException {
if (fileType == FILETYPE_TRIE) {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION);
} else if (fileType == FILETYPE_HEADER) {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.HEADER_FILE_EXTENSION);
} else if (fileType == FILETYPE_FREQUENCY) {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION);
@ -132,6 +137,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
if (!mDictDirectory.isDirectory()) {
throw new UnsupportedFormatException("Format 4 dictionary needs a directory");
}
mHeaderBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_HEADER));
mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE));
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
@ -149,6 +155,11 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
return mDictBuffer != null;
}
@UsedForTesting
/* package */ DictBuffer getHeaderBuffer() {
return mHeaderBuffer;
}
@UsedForTesting
/* package */ DictBuffer getDictBuffer() {
return mDictBuffer;
@ -156,10 +167,11 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
@Override
public FileHeader readHeader() throws IOException, UnsupportedFormatException {
if (mDictBuffer == null) {
if (mHeaderBuffer == null) {
openDictBuffer();
}
final FileHeader header = super.readHeader(mDictBuffer);
mHeaderBuffer.position(0);
final FileHeader header = super.readHeader(mHeaderBuffer);
final int version = header.mFormatOptions.mVersion;
if (version != FormatSpec.VERSION4) {
throw new UnsupportedFormatException("File header has a wrong version : " + version);

View file

@ -26,6 +26,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.FileUtils;
import java.io.File;
import java.io.FileNotFoundException;
@ -45,8 +46,8 @@ public class Ver4DictEncoder implements DictEncoder {
private final File mDictPlacedDir;
private byte[] mTrieBuf;
private int mTriePos;
private int mHeaderSize;
private OutputStream mTrieOutStream;
private OutputStream mHeaderOutStream;
private OutputStream mFreqOutStream;
private OutputStream mUnigramTimestampOutStream;
private OutputStream mTerminalAddressTableOutStream;
@ -185,16 +186,21 @@ public class Ver4DictEncoder implements DictEncoder {
mBaseFilename = header.getId() + "." + header.getVersion();
mDictDir = new File(mDictPlacedDir, mBaseFilename);
final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION);
final File headerFile = new File(mDictDir,
mBaseFilename + FormatSpec.HEADER_FILE_EXTENSION);
final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
final File timestampFile = new File(mDictDir,
mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION);
final File terminalAddressTableFile = new File(mDictDir,
mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
if (!mDictDir.isDirectory()) {
if (mDictDir.exists()) mDictDir.delete();
if (mDictDir.exists()) {
FileUtils.deleteRecursively(mDictDir);
}
mDictDir.mkdirs();
}
mTrieOutStream = new FileOutputStream(trieFile);
mHeaderOutStream = new FileOutputStream(headerFile);
mFreqOutStream = new FileOutputStream(freqFile);
mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
if (formatOptions.mHasTimestamp) {
@ -207,6 +213,9 @@ public class Ver4DictEncoder implements DictEncoder {
if (mTrieOutStream != null) {
mTrieOutStream.close();
}
if (mHeaderOutStream != null) {
mHeaderOutStream.close();
}
if (mFreqOutStream != null) {
mFreqOutStream.close();
}
@ -218,6 +227,7 @@ public class Ver4DictEncoder implements DictEncoder {
}
} finally {
mTrieOutStream = null;
mHeaderOutStream = null;
mFreqOutStream = null;
mTerminalAddressTableOutStream = null;
}
@ -238,8 +248,7 @@ public class Ver4DictEncoder implements DictEncoder {
openStreams(formatOptions, dict.mOptions);
}
mHeaderSize = BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict,
formatOptions);
BinaryDictEncoderUtils.writeDictionaryHeader(mHeaderOutStream, dict, formatOptions);
MakedictLog.i("Flattening the tree...");
ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
@ -423,7 +432,7 @@ public class Ver4DictEncoder implements DictEncoder {
ptNode.mFrequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
BinaryDictEncoderUtils.writeUIntToBuffer(terminalAddressTableBuf,
ptNode.mTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
ptNode.mCachedAddressAfterUpdate + mHeaderSize,
ptNode.mCachedAddressAfterUpdate,
FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
}
}

View file

@ -122,7 +122,8 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
@Override
protected String getFileNameExtentionToOpenDict() {
return "/" + FormatSpec.TRIE_FILE_EXTENSION;
// TODO: pass the directory name instead
return "/" + FormatSpec.HEADER_FILE_EXTENSION;
}
public void addMultipleDictionaryEntriesToDictionary(

View file

@ -47,7 +47,7 @@ namespace latinime {
case FormatUtils::VERSION_4: {
const int dictDirPathBufSize = strlen(path) + 1 /* terminator */;
char dictDirPath[dictDirPathBufSize];
if (!FileUtils::getFilePathWithoutSuffix(path, Ver4DictConstants::TRIE_FILE_EXTENSION,
if (!FileUtils::getFilePathWithoutSuffix(path, Ver4DictConstants::HEADER_FILE_EXTENSION,
dictDirPathBufSize, dictDirPath)) {
// Dictionary file name is not valid as a version 4 dictionary.
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);

View file

@ -60,9 +60,8 @@ class SingleDictContent : public DictContent {
}
bool flush(const char *const dictDirPath, const char *const contentFileName) const {
const BufferWithExtendableBuffer *bufferPtr = &mExpandableContentBuffer;
return DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, contentFileName,
&bufferPtr, 1 /* bufferCount */);
return DictFileWritingUtils::flushBufferToFileInDir(dictDirPath, contentFileName,
&mExpandableContentBuffer);
}
private:

View file

@ -21,19 +21,16 @@ namespace latinime {
bool SparseTableDictContent::flush(const char *const dictDirPath,
const char *const lookupTableFileName, const char *const addressTableFileName,
const char *const contentFileName) const {
const BufferWithExtendableBuffer *lookupTableBufferPtr = &mExpandableLookupTableBuffer;
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, lookupTableFileName,
&lookupTableBufferPtr, 1 /* bufferCount */)) {
if (!DictFileWritingUtils::flushBufferToFileInDir(dictDirPath, lookupTableFileName,
&mExpandableLookupTableBuffer)){
return false;
}
const BufferWithExtendableBuffer *addressTableBufferPtr = &mExpandableAddressTableBuffer;
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, addressTableFileName,
&addressTableBufferPtr, 1 /* bufferCount */)) {
if (!DictFileWritingUtils::flushBufferToFileInDir(dictDirPath, addressTableFileName,
&mExpandableAddressTableBuffer)) {
return false;
}
const BufferWithExtendableBuffer *contentBufferPtr = &mExpandableContentBuffer;
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, contentFileName,
&contentBufferPtr, 1 /* bufferCount */)) {
if (!DictFileWritingUtils::flushBufferToFileInDir(dictDirPath, contentFileName,
&mExpandableContentBuffer)) {
return false;
}
return true;

View file

@ -28,7 +28,7 @@ int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId)
const int terminalPos = getBuffer()->readUint(
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
NOT_A_DICT_POS : terminalPos - mHeaderRegionSize;
NOT_A_DICT_POS : terminalPos;
}
bool TerminalPositionLookupTable::setTerminalPtNodePosition(
@ -45,18 +45,16 @@ bool TerminalPositionLookupTable::setTerminalPtNodePosition(
mSize++;
}
const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
terminalPtNodePos + mHeaderRegionSize : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
return getWritableBuffer()->writeUint(terminalPos,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
}
bool TerminalPositionLookupTable::flushToFile(const char *const dictDirPath,
const int newHeaderRegionSize) const {
const int headerRegionSizeDiff = newHeaderRegionSize - mHeaderRegionSize;
// If header region size has been changed or used buffer size is smaller than actual buffer
// size, regenerate lookup table and write the new table to file.
if (headerRegionSizeDiff != 0 || getEntryPos(mSize) < getBuffer()->getTailPosition()) {
TerminalPositionLookupTable lookupTableToWrite(newHeaderRegionSize);
bool TerminalPositionLookupTable::flushToFile(const char *const dictDirPath) const {
// If the used buffer size is smaller than the actual buffer size, regenerate the lookup
// table and write the new table to the file.
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
TerminalPositionLookupTable lookupTableToWrite;
for (int i = 0; i < mSize; ++i) {
const int terminalPtNodePosition = getTerminalPtNodePosition(i);
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
@ -68,7 +66,7 @@ bool TerminalPositionLookupTable::flushToFile(const char *const dictDirPath,
return lookupTableToWrite.flush(dictDirPath,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
} else {
// We can simply use this lookup table because the header region size has not been
// We can simply use this lookup table because the buffer size has not been
// changed.
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
}

View file

@ -28,19 +28,13 @@ class TerminalPositionLookupTable : public SingleDictContent {
public:
typedef hash_map_compat<int, int> TerminalIdMap;
// TODO: Quit using headerRegionSize.
TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable,
const int headerRegionSize)
TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable)
: SingleDictContent(dictDirPath,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
mSize(getBuffer()->getTailPosition()
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE),
mHeaderRegionSize(headerRegionSize) {}
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
explicit TerminalPositionLookupTable(const int headerRegionSize)
: mSize(0), mHeaderRegionSize(headerRegionSize) {}
TerminalPositionLookupTable() : mSize(0), mHeaderRegionSize(0) {}
TerminalPositionLookupTable() : mSize(0) {}
int getTerminalPtNodePosition(const int terminalId) const;
@ -50,7 +44,7 @@ class TerminalPositionLookupTable : public SingleDictContent {
return mSize;
}
bool flushToFile(const char *const dictDirPath, const int newHeaderRegionSize) const;
bool flushToFile(const char *const dictDirPath) const;
bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
@ -62,7 +56,6 @@ class TerminalPositionLookupTable : public SingleDictContent {
}
int mSize;
const int mHeaderRegionSize;
};
} // namespace latinime
#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H

View file

@ -45,16 +45,22 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
return false;
}
// Write header file.
if (!DictFileWritingUtils::flushBufferToFileInDir(tmpDirPath,
Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
AKLOGE("Dictionary header file %s/%s cannot be written.", tmpDirPath,
Ver4DictConstants::HEADER_FILE_EXTENSION);
return false;
}
// Write trie file.
const BufferWithExtendableBuffer *buffers[] = {headerBuffer, &mExpandableTrieBuffer};
if (!DictFileWritingUtils::flushBuffersToFileInDir(tmpDirPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, buffers, 2 /* bufferCount */)) {
if (!DictFileWritingUtils::flushBufferToFileInDir(tmpDirPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
AKLOGE("Dictionary trie file %s/%s cannot be written.", tmpDirPath,
Ver4DictConstants::TRIE_FILE_EXTENSION);
return false;
}
// Write dictionary contents.
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath, headerBuffer->getTailPosition())) {
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath)) {
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
return false;
}

View file

@ -34,9 +34,10 @@ class Ver4DictBuffers {
typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
static AK_FORCE_INLINE Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
const MmappedBuffer::MmappedBufferPtr &dictBuffer) {
const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false;
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable));
const MmappedBuffer::MmappedBufferPtr &headerBuffer) {
const bool isUpdatable = headerBuffer.get() ? headerBuffer.get()->isUpdatable() : false;
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, headerBuffer, isUpdatable));
}
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
@ -121,16 +122,17 @@ class Ver4DictBuffers {
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
: mDictBuffer(dictBuffer),
mHeaderPolicy(mDictBuffer.get()->getBuffer(), FormatUtils::VERSION_4),
mExpandableHeaderBuffer(dictBuffer.get()->getBuffer(), mHeaderPolicy.getSize(),
const MmappedBuffer::MmappedBufferPtr &headerBuffer, const bool isUpdatable)
: mHeaderBuffer(headerBuffer),
mDictBuffer(MmappedBuffer::openBuffer(dictDirPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
mHeaderPolicy(headerBuffer.get()->getBuffer(), FormatUtils::VERSION_4),
mExpandableHeaderBuffer(headerBuffer.get()->getBuffer(), mHeaderPolicy.getSize(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableTrieBuffer(dictBuffer.get()->getBuffer() + mHeaderPolicy.getSize(),
dictBuffer.get()->getBufferSize() - mHeaderPolicy.getSize(),
mExpandableTrieBuffer(mDictBuffer.get()->getBuffer(),
mDictBuffer.get()->getBufferSize(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
// TODO: Quit using header size.
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderPolicy.getSize()),
mTerminalPositionLookupTable(dictDirPath, isUpdatable),
mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
isUpdatable),
mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
@ -139,7 +141,7 @@ class Ver4DictBuffers {
mIsUpdatable(isUpdatable) {}
AK_FORCE_INLINE Ver4DictBuffers(const HeaderPolicy *const headerPolicy)
: mDictBuffer(0), mHeaderPolicy(),
: mHeaderBuffer(0), mDictBuffer(0), mHeaderPolicy(),
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mTerminalPositionLookupTable(),
@ -147,6 +149,7 @@ class Ver4DictBuffers {
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {}
const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mExpandableHeaderBuffer;

View file

@ -18,7 +18,9 @@
namespace latinime {
// These values MUST match the definitions in FormatSpec.java.
const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
// tat = Terminal Address Table
const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";

View file

@ -25,6 +25,7 @@ namespace latinime {
class Ver4DictConstants {
public:
static const char *const TRIE_FILE_EXTENSION;
static const char *const HEADER_FILE_EXTENSION;
static const char *const FREQ_FILE_EXTENSION;
static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
static const char *const BIGRAM_FILE_EXTENSION;

View file

@ -68,42 +68,43 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
char tmpFileName[tmpFileNameBufSize];
FileUtils::getFilePathWithSuffix(filePath, TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE,
tmpFileNameBufSize, tmpFileName);
const BufferWithExtendableBuffer *buffers[] = {dictHeader, dictBody};
if (!DictFileWritingUtils::flushBuffersToFile(tmpFileName, buffers, 2 /* bufferCount */)) {
if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictHeader)) {
AKLOGE("Dictionary header cannot be written to %s.", tmpFileName);
return false;
}
if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictBody)) {
AKLOGE("Dictionary structure cannot be written to %s.", tmpFileName);
return false;
}
if (rename(tmpFileName, filePath) != 0) {
AKLOGE("Dictionary file %s cannot be renamed to %s", tmpFileName, filePath);;
return false;
}
return true;
}
/* static */ bool DictFileWritingUtils::flushBuffersToFileInDir(const char *const dirPath,
const char *const fileName, const BufferWithExtendableBuffer **const buffers,
const int bufferCount) {
/* static */ bool DictFileWritingUtils::flushBufferToFileInDir(const char *const dirPath,
const char *const fileName, const BufferWithExtendableBuffer *const buffer) {
const int filePathBufSize = FileUtils::getFilePathBufSize(dirPath, fileName);
char filePath[filePathBufSize];
FileUtils::getFilePath(dirPath, fileName, filePathBufSize, filePath);
return flushBuffersToFile(filePath, buffers, bufferCount);
return flushBufferToFile(filePath, buffer);
}
/* static */ bool DictFileWritingUtils::flushBuffersToFile(const char *const filePath,
const BufferWithExtendableBuffer **const buffers, const int bufferCount) {
/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
const BufferWithExtendableBuffer *const buffer) {
FILE *const file = fopen(filePath, "wb");
if (!file) {
AKLOGE("File %s cannot be opened.", filePath);
ASSERT(false);
return false;
}
for (int i = 0; i < bufferCount; ++i) {
if (!writeBufferToFile(file, buffers[i])) {
remove(filePath);
AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
buffers[i]->getTailPosition());
ASSERT(false);
return false;
}
if (!writeBufferToFile(file, buffer)) {
remove(filePath);
AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
buffer->getTailPosition());
ASSERT(false);
return false;
}
fclose(file);
return true;

View file

@ -37,8 +37,8 @@ class DictFileWritingUtils {
BufferWithExtendableBuffer *const dictHeader,
BufferWithExtendableBuffer *const dictBody);
static bool flushBuffersToFileInDir(const char *const dirPath, const char *const fileName,
const BufferWithExtendableBuffer **const buffers, const int bufferCount);
static bool flushBufferToFileInDir(const char *const dirPath, const char *const fileName,
const BufferWithExtendableBuffer *const buffer);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
@ -46,8 +46,8 @@ class DictFileWritingUtils {
static bool createEmptyV4DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
static bool flushBuffersToFile(const char *const filePath,
const BufferWithExtendableBuffer **const buffers, const int bufferCount);
static bool flushBufferToFile(const char *const filePath,
const BufferWithExtendableBuffer *const buffer);
static bool writeBufferToFile(FILE *const file,
const BufferWithExtendableBuffer *const buffer);

View file

@ -29,9 +29,10 @@ namespace latinime {
class FormatUtils {
public:
enum FORMAT_VERSION {
// These MUST have the same values as the relevant constants in FormatSpec.java.
VERSION_2 = 2,
VERSION_3 = 3,
VERSION_4 = 4,
VERSION_4 = 400,
UNKNOWN_VERSION = -1
};

View file

@ -22,6 +22,7 @@ import android.util.Pair;
import com.android.inputmethod.latin.makedict.CodePointUtils;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.utils.FileUtils;
import java.io.File;
import java.io.IOException;
@ -102,7 +103,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
private File createEmptyDictionaryAndGetFile(final String dictId,
final int formatVersion) throws IOException {
if (formatVersion == 4) {
if (formatVersion == FormatSpec.VERSION4) {
return createEmptyVer4DictionaryAndGetFile(dictId);
} else {
throw new IOException("Dictionary format version " + formatVersion
@ -112,7 +113,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
getContext().getCacheDir());
file.delete();
FileUtils.deleteRecursively(file);
file.mkdir();
Map<String, String> attributeMap = new HashMap<String, String>();
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
@ -123,10 +124,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
FormatSpec.VERSION4, attributeMap)) {
return new File(file, FormatSpec.TRIE_FILE_EXTENSION);
return new File(file, FormatSpec.HEADER_FILE_EXTENSION);
} else {
throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
+ FormatSpec.TRIE_FILE_EXTENSION + " cannot be created.");
+ FormatSpec.HEADER_FILE_EXTENSION + " cannot be created.");
}
}

View file

@ -72,10 +72,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
FormatSpec.VERSION4, attributeMap)) {
return new File(file, FormatSpec.TRIE_FILE_EXTENSION);
return new File(file, FormatSpec.HEADER_FILE_EXTENSION);
} else {
throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
+ FormatSpec.TRIE_FILE_EXTENSION + " cannot be created.");
+ FormatSpec.HEADER_FILE_EXTENSION + " cannot be created.");
}
}

View file

@ -523,7 +523,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
return null;
}
if (fileHeader == null) return null;
return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
address, fileHeader.mFormatOptions).mWord;
}

View file

@ -216,7 +216,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
final FileHeader fileHeader = dictDecoder.readHeader();
assertEquals(word,
BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
position, fileHeader.mFormatOptions).mWord);
} catch (IOException e) {
Log.e(TAG, "Raised an IOException while looking up a word", e);

View file

@ -29,17 +29,18 @@ public class BinaryDictUtils {
public static final String TEST_DICT_FILE_EXTENSION = ".testDict";
public static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
public static final FormatSpec.FormatOptions VERSION2 =
new FormatSpec.FormatOptions(FormatSpec.VERSION2);
public static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */);
new FormatSpec.FormatOptions(FormatSpec.VERSION3, false /* supportsDynamicUpdate */);
public static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
new FormatSpec.FormatOptions(FormatSpec.VERSION3, true /* supportsDynamicUpdate */);
public static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE =
new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */);
new FormatSpec.FormatOptions(FormatSpec.VERSION4, false /* supportsDynamicUpdate */);
public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE =
new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */);
new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* supportsDynamicUpdate */);
public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP =
new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */,
new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* supportsDynamicUpdate */,
true /* hasTimestamp */);
public static DictionaryOptions makeDictionaryOptions(final String id, final String version) {
@ -53,9 +54,10 @@ public class BinaryDictUtils {
public static File getDictFile(final String name, final String version,
final FormatOptions formatOptions, final File directory) {
if (formatOptions.mVersion == 2 || formatOptions.mVersion == 3) {
if (formatOptions.mVersion == FormatSpec.VERSION2
|| formatOptions.mVersion == FormatSpec.VERSION3) {
return new File(directory, name + "." + version + TEST_DICT_FILE_EXTENSION);
} else if (formatOptions.mVersion == 4) {
} else if (formatOptions.mVersion == FormatSpec.VERSION4) {
return new File(directory, name + "." + version);
} else {
throw new RuntimeException("the format option has a wrong version : "
@ -67,7 +69,8 @@ public class BinaryDictUtils {
final File cacheDir) {
if (formatOptions.mVersion == FormatSpec.VERSION4) {
return new Ver4DictEncoder(cacheDir);
} else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) {
} else if (formatOptions.mVersion == FormatSpec.VERSION3
|| formatOptions.mVersion == FormatSpec.VERSION2) {
return new Ver3DictEncoder(file);
} else {
throw new RuntimeException("The format option has a wrong version : "
@ -79,7 +82,7 @@ public class BinaryDictUtils {
throws UnsupportedFormatException {
if (formatOptions.mVersion == FormatSpec.VERSION4) {
return new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
} else if (formatOptions.mVersion == 3) {
} else if (formatOptions.mVersion == FormatSpec.VERSION3) {
return new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
} else {
throw new UnsupportedFormatException("The format option has a wrong version : "

View file

@ -30,6 +30,7 @@ MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict
USED_TARGETTED_UTILS := \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java
DICTTOOL_ONDEVICE_TESTS_DIRECTORY := \

View file

@ -159,9 +159,9 @@ public class DictionaryMaker {
if (OPTION_VERSION_2.equals(arg)) {
// Do nothing, this is the default
} else if (OPTION_VERSION_3.equals(arg)) {
outputBinaryFormatVersion = 3;
outputBinaryFormatVersion = FormatSpec.VERSION3;
} else if (OPTION_VERSION_4.equals(arg)) {
outputBinaryFormatVersion = 4;
outputBinaryFormatVersion = FormatSpec.VERSION4;
} else if (OPTION_HELP.equals(arg)) {
displayHelp();
} else {
@ -358,7 +358,7 @@ public class DictionaryMaker {
final File outputFile = new File(outputFilename);
final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version);
final DictEncoder dictEncoder;
if (version == 4) {
if (version == FormatSpec.VERSION4) {
dictEncoder = new Ver4DictEncoder(outputFile);
} else {
dictEncoder = new Ver3DictEncoder(outputFile);