Merge "Split the header into a separate file."
This commit is contained in:
commit
a8ffdf0288
26 changed files with 155 additions and 121 deletions
|
@ -36,27 +36,25 @@ public abstract class AbstractDictDecoder implements DictDecoder {
|
||||||
private static final int ERROR_CANNOT_READ = 1;
|
private static final int ERROR_CANNOT_READ = 1;
|
||||||
private static final int ERROR_WRONG_FORMAT = 2;
|
private static final int ERROR_WRONG_FORMAT = 2;
|
||||||
|
|
||||||
protected FileHeader readHeader(final DictBuffer dictBuffer)
|
protected FileHeader readHeader(final DictBuffer headerBuffer)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException {
|
||||||
if (dictBuffer == null) {
|
if (headerBuffer == null) {
|
||||||
openDictBuffer();
|
openDictBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
final int version = HeaderReader.readVersion(dictBuffer);
|
final int version = HeaderReader.readVersion(headerBuffer);
|
||||||
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||||
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
|
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
|
||||||
throw new UnsupportedFormatException("Unsupported version : " + version);
|
throw new UnsupportedFormatException("Unsupported version : " + version);
|
||||||
}
|
}
|
||||||
// TODO: Remove this field.
|
// TODO: Remove this field.
|
||||||
final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
|
final int optionsFlags = HeaderReader.readOptionFlags(headerBuffer);
|
||||||
|
final int headerSize = HeaderReader.readHeaderSize(headerBuffer);
|
||||||
final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
|
|
||||||
|
|
||||||
if (headerSize < 0) {
|
if (headerSize < 0) {
|
||||||
throw new UnsupportedFormatException("header size can't be negative.");
|
throw new UnsupportedFormatException("header size can't be negative.");
|
||||||
}
|
}
|
||||||
|
|
||||||
final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
|
final HashMap<String, String> attributes = HeaderReader.readAttributes(headerBuffer,
|
||||||
headerSize);
|
headerSize);
|
||||||
|
|
||||||
final FileHeader header = new FileHeader(headerSize,
|
final FileHeader header = new FileHeader(headerSize,
|
||||||
|
|
|
@ -600,7 +600,7 @@ public final class BinaryDictDecoderUtils {
|
||||||
|
|
||||||
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
||||||
Map<Integer, PtNode> reversePtNodeMapping = new TreeMap<Integer, PtNode>();
|
Map<Integer, PtNode> reversePtNodeMapping = new TreeMap<Integer, PtNode>();
|
||||||
final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mHeaderSize,
|
final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mBodyOffset,
|
||||||
reverseNodeArrayMapping, reversePtNodeMapping, fileHeader.mFormatOptions);
|
reverseNodeArrayMapping, reversePtNodeMapping, fileHeader.mFormatOptions);
|
||||||
|
|
||||||
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
||||||
|
|
|
@ -62,7 +62,7 @@ public final class BinaryDictIOUtils {
|
||||||
* Retrieves all node arrays without recursive call.
|
* Retrieves all node arrays without recursive call.
|
||||||
*/
|
*/
|
||||||
private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder,
|
private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder,
|
||||||
final int headerSize, final Map<Integer, String> words,
|
final int bodyOffset, final Map<Integer, String> words,
|
||||||
final Map<Integer, Integer> frequencies,
|
final Map<Integer, Integer> frequencies,
|
||||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams,
|
final Map<Integer, ArrayList<PendingAttribute>> bigrams,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
|
@ -71,7 +71,7 @@ public final class BinaryDictIOUtils {
|
||||||
Stack<Position> stack = new Stack<Position>();
|
Stack<Position> stack = new Stack<Position>();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
||||||
Position initPos = new Position(headerSize, 0);
|
Position initPos = new Position(bodyOffset, 0);
|
||||||
stack.push(initPos);
|
stack.push(initPos);
|
||||||
|
|
||||||
while (!stack.empty()) {
|
while (!stack.empty()) {
|
||||||
|
@ -154,7 +154,7 @@ public final class BinaryDictIOUtils {
|
||||||
UnsupportedFormatException {
|
UnsupportedFormatException {
|
||||||
// Read header
|
// Read header
|
||||||
final FileHeader header = dictDecoder.readHeader();
|
final FileHeader header = dictDecoder.readHeader();
|
||||||
readUnigramsAndBigramsBinaryInner(dictDecoder, header.mHeaderSize, words,
|
readUnigramsAndBigramsBinaryInner(dictDecoder, header.mBodyOffset, words,
|
||||||
frequencies, bigrams, header.mFormatOptions);
|
frequencies, bigrams, header.mFormatOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -199,13 +199,19 @@ public final class FormatSpec {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public static final int MAGIC_NUMBER = 0x9BC13AFE;
|
public static final int MAGIC_NUMBER = 0x9BC13AFE;
|
||||||
static final int MINIMUM_SUPPORTED_VERSION = 2;
|
|
||||||
static final int MAXIMUM_SUPPORTED_VERSION = 4;
|
|
||||||
static final int NOT_A_VERSION_NUMBER = -1;
|
static final int NOT_A_VERSION_NUMBER = -1;
|
||||||
static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3;
|
static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3;
|
||||||
static final int FIRST_VERSION_WITH_TERMINAL_ID = 4;
|
static final int FIRST_VERSION_WITH_TERMINAL_ID = 4;
|
||||||
|
|
||||||
|
// These MUST have the same values as the relevant constants in format_utils.h.
|
||||||
|
// From version 4 on, we use version * 100 + revision as a version number. That allows
|
||||||
|
// us to change the format during development while having testing devices remove
|
||||||
|
// older files with each upgrade, while still having a readable versioning scheme.
|
||||||
|
public static final int VERSION2 = 2;
|
||||||
public static final int VERSION3 = 3;
|
public static final int VERSION3 = 3;
|
||||||
public static final int VERSION4 = 4;
|
public static final int VERSION4 = 400;
|
||||||
|
static final int MINIMUM_SUPPORTED_VERSION = VERSION2;
|
||||||
|
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4;
|
||||||
|
|
||||||
// These options need to be the same numeric values as the one in the native reading code.
|
// These options need to be the same numeric values as the one in the native reading code.
|
||||||
static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
|
static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
|
||||||
|
@ -263,8 +269,10 @@ public final class FormatSpec {
|
||||||
static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
|
static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
|
||||||
static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2;
|
static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2;
|
||||||
|
|
||||||
// These values are used only by version 4 or later.
|
// These values are used only by version 4 or later. They MUST match the definitions in
|
||||||
public static final String TRIE_FILE_EXTENSION = ".trie";
|
// ver4_dict_constants.cpp.
|
||||||
|
static final String TRIE_FILE_EXTENSION = ".trie";
|
||||||
|
public static final String HEADER_FILE_EXTENSION = ".header";
|
||||||
static final String FREQ_FILE_EXTENSION = ".freq";
|
static final String FREQ_FILE_EXTENSION = ".freq";
|
||||||
static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp";
|
static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp";
|
||||||
// tat = Terminal Address Table
|
// tat = Terminal Address Table
|
||||||
|
@ -361,7 +369,7 @@ public final class FormatSpec {
|
||||||
* Class representing file header.
|
* Class representing file header.
|
||||||
*/
|
*/
|
||||||
public static final class FileHeader {
|
public static final class FileHeader {
|
||||||
public final int mHeaderSize;
|
public final int mBodyOffset;
|
||||||
public final DictionaryOptions mDictionaryOptions;
|
public final DictionaryOptions mDictionaryOptions;
|
||||||
public final FormatOptions mFormatOptions;
|
public final FormatOptions mFormatOptions;
|
||||||
// Note that these are corresponding definitions in native code in latinime::HeaderPolicy
|
// Note that these are corresponding definitions in native code in latinime::HeaderPolicy
|
||||||
|
@ -377,9 +385,9 @@ public final class FormatSpec {
|
||||||
private static final String DICTIONARY_DESCRIPTION_ATTRIBUTE = "description";
|
private static final String DICTIONARY_DESCRIPTION_ATTRIBUTE = "description";
|
||||||
public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
|
public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
mHeaderSize = headerSize;
|
|
||||||
mDictionaryOptions = dictionaryOptions;
|
mDictionaryOptions = dictionaryOptions;
|
||||||
mFormatOptions = formatOptions;
|
mFormatOptions = formatOptions;
|
||||||
|
mBodyOffset = formatOptions.mVersion < VERSION4 ? headerSize : 0;
|
||||||
if (null == getLocaleString()) {
|
if (null == getLocaleString()) {
|
||||||
throw new RuntimeException("Cannot create a FileHeader without a locale");
|
throw new RuntimeException("Cannot create a FileHeader without a locale");
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,10 +45,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
protected static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
|
protected static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
|
||||||
protected static final int FILETYPE_BIGRAM_FREQ = 4;
|
protected static final int FILETYPE_BIGRAM_FREQ = 4;
|
||||||
protected static final int FILETYPE_SHORTCUT = 5;
|
protected static final int FILETYPE_SHORTCUT = 5;
|
||||||
|
protected static final int FILETYPE_HEADER = 6;
|
||||||
|
|
||||||
protected final File mDictDirectory;
|
protected final File mDictDirectory;
|
||||||
protected final DictionaryBufferFactory mBufferFactory;
|
protected final DictionaryBufferFactory mBufferFactory;
|
||||||
protected DictBuffer mDictBuffer;
|
protected DictBuffer mDictBuffer;
|
||||||
|
protected DictBuffer mHeaderBuffer;
|
||||||
protected DictBuffer mFrequencyBuffer;
|
protected DictBuffer mFrequencyBuffer;
|
||||||
protected DictBuffer mTerminalAddressTableBuffer;
|
protected DictBuffer mTerminalAddressTableBuffer;
|
||||||
private BigramContentReader mBigramReader;
|
private BigramContentReader mBigramReader;
|
||||||
|
@ -83,7 +85,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
|
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
|
||||||
mDictDirectory = dictDirectory;
|
mDictDirectory = dictDirectory;
|
||||||
mDictBuffer = mFrequencyBuffer = null;
|
mDictBuffer = mHeaderBuffer = mFrequencyBuffer = null;
|
||||||
|
|
||||||
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
|
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
|
||||||
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
|
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
|
||||||
|
@ -100,13 +102,16 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
/* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
|
/* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
|
||||||
mDictDirectory = dictDirectory;
|
mDictDirectory = dictDirectory;
|
||||||
mBufferFactory = factory;
|
mBufferFactory = factory;
|
||||||
mDictBuffer = mFrequencyBuffer = null;
|
mDictBuffer = mHeaderBuffer = mFrequencyBuffer = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected File getFile(final int fileType) throws UnsupportedFormatException {
|
protected File getFile(final int fileType) throws UnsupportedFormatException {
|
||||||
if (fileType == FILETYPE_TRIE) {
|
if (fileType == FILETYPE_TRIE) {
|
||||||
return new File(mDictDirectory,
|
return new File(mDictDirectory,
|
||||||
mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION);
|
mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION);
|
||||||
|
} else if (fileType == FILETYPE_HEADER) {
|
||||||
|
return new File(mDictDirectory,
|
||||||
|
mDictDirectory.getName() + FormatSpec.HEADER_FILE_EXTENSION);
|
||||||
} else if (fileType == FILETYPE_FREQUENCY) {
|
} else if (fileType == FILETYPE_FREQUENCY) {
|
||||||
return new File(mDictDirectory,
|
return new File(mDictDirectory,
|
||||||
mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION);
|
mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION);
|
||||||
|
@ -132,6 +137,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
if (!mDictDirectory.isDirectory()) {
|
if (!mDictDirectory.isDirectory()) {
|
||||||
throw new UnsupportedFormatException("Format 4 dictionary needs a directory");
|
throw new UnsupportedFormatException("Format 4 dictionary needs a directory");
|
||||||
}
|
}
|
||||||
|
mHeaderBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_HEADER));
|
||||||
mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE));
|
mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE));
|
||||||
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
|
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
|
||||||
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
|
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
|
||||||
|
@ -149,6 +155,11 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
return mDictBuffer != null;
|
return mDictBuffer != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
/* package */ DictBuffer getHeaderBuffer() {
|
||||||
|
return mHeaderBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
/* package */ DictBuffer getDictBuffer() {
|
/* package */ DictBuffer getDictBuffer() {
|
||||||
return mDictBuffer;
|
return mDictBuffer;
|
||||||
|
@ -156,10 +167,11 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FileHeader readHeader() throws IOException, UnsupportedFormatException {
|
public FileHeader readHeader() throws IOException, UnsupportedFormatException {
|
||||||
if (mDictBuffer == null) {
|
if (mHeaderBuffer == null) {
|
||||||
openDictBuffer();
|
openDictBuffer();
|
||||||
}
|
}
|
||||||
final FileHeader header = super.readHeader(mDictBuffer);
|
mHeaderBuffer.position(0);
|
||||||
|
final FileHeader header = super.readHeader(mHeaderBuffer);
|
||||||
final int version = header.mFormatOptions.mVersion;
|
final int version = header.mFormatOptions.mVersion;
|
||||||
if (version != FormatSpec.VERSION4) {
|
if (version != FormatSpec.VERSION4) {
|
||||||
throw new UnsupportedFormatException("File header has a wrong version : " + version);
|
throw new UnsupportedFormatException("File header has a wrong version : " + version);
|
||||||
|
|
|
@ -26,6 +26,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
|
import com.android.inputmethod.latin.utils.FileUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
@ -45,8 +46,8 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
private final File mDictPlacedDir;
|
private final File mDictPlacedDir;
|
||||||
private byte[] mTrieBuf;
|
private byte[] mTrieBuf;
|
||||||
private int mTriePos;
|
private int mTriePos;
|
||||||
private int mHeaderSize;
|
|
||||||
private OutputStream mTrieOutStream;
|
private OutputStream mTrieOutStream;
|
||||||
|
private OutputStream mHeaderOutStream;
|
||||||
private OutputStream mFreqOutStream;
|
private OutputStream mFreqOutStream;
|
||||||
private OutputStream mUnigramTimestampOutStream;
|
private OutputStream mUnigramTimestampOutStream;
|
||||||
private OutputStream mTerminalAddressTableOutStream;
|
private OutputStream mTerminalAddressTableOutStream;
|
||||||
|
@ -185,16 +186,21 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
mBaseFilename = header.getId() + "." + header.getVersion();
|
mBaseFilename = header.getId() + "." + header.getVersion();
|
||||||
mDictDir = new File(mDictPlacedDir, mBaseFilename);
|
mDictDir = new File(mDictPlacedDir, mBaseFilename);
|
||||||
final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION);
|
final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION);
|
||||||
|
final File headerFile = new File(mDictDir,
|
||||||
|
mBaseFilename + FormatSpec.HEADER_FILE_EXTENSION);
|
||||||
final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
|
final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
|
||||||
final File timestampFile = new File(mDictDir,
|
final File timestampFile = new File(mDictDir,
|
||||||
mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION);
|
mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION);
|
||||||
final File terminalAddressTableFile = new File(mDictDir,
|
final File terminalAddressTableFile = new File(mDictDir,
|
||||||
mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||||
if (!mDictDir.isDirectory()) {
|
if (!mDictDir.isDirectory()) {
|
||||||
if (mDictDir.exists()) mDictDir.delete();
|
if (mDictDir.exists()) {
|
||||||
|
FileUtils.deleteRecursively(mDictDir);
|
||||||
|
}
|
||||||
mDictDir.mkdirs();
|
mDictDir.mkdirs();
|
||||||
}
|
}
|
||||||
mTrieOutStream = new FileOutputStream(trieFile);
|
mTrieOutStream = new FileOutputStream(trieFile);
|
||||||
|
mHeaderOutStream = new FileOutputStream(headerFile);
|
||||||
mFreqOutStream = new FileOutputStream(freqFile);
|
mFreqOutStream = new FileOutputStream(freqFile);
|
||||||
mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
|
mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
|
||||||
if (formatOptions.mHasTimestamp) {
|
if (formatOptions.mHasTimestamp) {
|
||||||
|
@ -207,6 +213,9 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
if (mTrieOutStream != null) {
|
if (mTrieOutStream != null) {
|
||||||
mTrieOutStream.close();
|
mTrieOutStream.close();
|
||||||
}
|
}
|
||||||
|
if (mHeaderOutStream != null) {
|
||||||
|
mHeaderOutStream.close();
|
||||||
|
}
|
||||||
if (mFreqOutStream != null) {
|
if (mFreqOutStream != null) {
|
||||||
mFreqOutStream.close();
|
mFreqOutStream.close();
|
||||||
}
|
}
|
||||||
|
@ -218,6 +227,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
mTrieOutStream = null;
|
mTrieOutStream = null;
|
||||||
|
mHeaderOutStream = null;
|
||||||
mFreqOutStream = null;
|
mFreqOutStream = null;
|
||||||
mTerminalAddressTableOutStream = null;
|
mTerminalAddressTableOutStream = null;
|
||||||
}
|
}
|
||||||
|
@ -238,8 +248,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
openStreams(formatOptions, dict.mOptions);
|
openStreams(formatOptions, dict.mOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
mHeaderSize = BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict,
|
BinaryDictEncoderUtils.writeDictionaryHeader(mHeaderOutStream, dict, formatOptions);
|
||||||
formatOptions);
|
|
||||||
|
|
||||||
MakedictLog.i("Flattening the tree...");
|
MakedictLog.i("Flattening the tree...");
|
||||||
ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
|
ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
|
||||||
|
@ -423,7 +432,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
ptNode.mFrequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
|
ptNode.mFrequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
|
||||||
BinaryDictEncoderUtils.writeUIntToBuffer(terminalAddressTableBuf,
|
BinaryDictEncoderUtils.writeUIntToBuffer(terminalAddressTableBuf,
|
||||||
ptNode.mTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
|
ptNode.mTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
|
||||||
ptNode.mCachedAddressAfterUpdate + mHeaderSize,
|
ptNode.mCachedAddressAfterUpdate,
|
||||||
FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,7 +122,8 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String getFileNameExtentionToOpenDict() {
|
protected String getFileNameExtentionToOpenDict() {
|
||||||
return "/" + FormatSpec.TRIE_FILE_EXTENSION;
|
// TODO: pass the directory name instead
|
||||||
|
return "/" + FormatSpec.HEADER_FILE_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addMultipleDictionaryEntriesToDictionary(
|
public void addMultipleDictionaryEntriesToDictionary(
|
||||||
|
|
|
@ -47,7 +47,7 @@ namespace latinime {
|
||||||
case FormatUtils::VERSION_4: {
|
case FormatUtils::VERSION_4: {
|
||||||
const int dictDirPathBufSize = strlen(path) + 1 /* terminator */;
|
const int dictDirPathBufSize = strlen(path) + 1 /* terminator */;
|
||||||
char dictDirPath[dictDirPathBufSize];
|
char dictDirPath[dictDirPathBufSize];
|
||||||
if (!FileUtils::getFilePathWithoutSuffix(path, Ver4DictConstants::TRIE_FILE_EXTENSION,
|
if (!FileUtils::getFilePathWithoutSuffix(path, Ver4DictConstants::HEADER_FILE_EXTENSION,
|
||||||
dictDirPathBufSize, dictDirPath)) {
|
dictDirPathBufSize, dictDirPath)) {
|
||||||
// Dictionary file name is not valid as a version 4 dictionary.
|
// Dictionary file name is not valid as a version 4 dictionary.
|
||||||
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
|
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
|
||||||
|
|
|
@ -60,9 +60,8 @@ class SingleDictContent : public DictContent {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool flush(const char *const dictDirPath, const char *const contentFileName) const {
|
bool flush(const char *const dictDirPath, const char *const contentFileName) const {
|
||||||
const BufferWithExtendableBuffer *bufferPtr = &mExpandableContentBuffer;
|
return DictFileWritingUtils::flushBufferToFileInDir(dictDirPath, contentFileName,
|
||||||
return DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, contentFileName,
|
&mExpandableContentBuffer);
|
||||||
&bufferPtr, 1 /* bufferCount */);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -21,19 +21,16 @@ namespace latinime {
|
||||||
bool SparseTableDictContent::flush(const char *const dictDirPath,
|
bool SparseTableDictContent::flush(const char *const dictDirPath,
|
||||||
const char *const lookupTableFileName, const char *const addressTableFileName,
|
const char *const lookupTableFileName, const char *const addressTableFileName,
|
||||||
const char *const contentFileName) const {
|
const char *const contentFileName) const {
|
||||||
const BufferWithExtendableBuffer *lookupTableBufferPtr = &mExpandableLookupTableBuffer;
|
if (!DictFileWritingUtils::flushBufferToFileInDir(dictDirPath, lookupTableFileName,
|
||||||
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, lookupTableFileName,
|
&mExpandableLookupTableBuffer)){
|
||||||
&lookupTableBufferPtr, 1 /* bufferCount */)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const BufferWithExtendableBuffer *addressTableBufferPtr = &mExpandableAddressTableBuffer;
|
if (!DictFileWritingUtils::flushBufferToFileInDir(dictDirPath, addressTableFileName,
|
||||||
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, addressTableFileName,
|
&mExpandableAddressTableBuffer)) {
|
||||||
&addressTableBufferPtr, 1 /* bufferCount */)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const BufferWithExtendableBuffer *contentBufferPtr = &mExpandableContentBuffer;
|
if (!DictFileWritingUtils::flushBufferToFileInDir(dictDirPath, contentFileName,
|
||||||
if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, contentFileName,
|
&mExpandableContentBuffer)) {
|
||||||
&contentBufferPtr, 1 /* bufferCount */)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -28,7 +28,7 @@ int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId)
|
||||||
const int terminalPos = getBuffer()->readUint(
|
const int terminalPos = getBuffer()->readUint(
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
|
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
|
||||||
return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
|
return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
|
||||||
NOT_A_DICT_POS : terminalPos - mHeaderRegionSize;
|
NOT_A_DICT_POS : terminalPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TerminalPositionLookupTable::setTerminalPtNodePosition(
|
bool TerminalPositionLookupTable::setTerminalPtNodePosition(
|
||||||
|
@ -45,18 +45,16 @@ bool TerminalPositionLookupTable::setTerminalPtNodePosition(
|
||||||
mSize++;
|
mSize++;
|
||||||
}
|
}
|
||||||
const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
|
const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
|
||||||
terminalPtNodePos + mHeaderRegionSize : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
|
terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
|
||||||
return getWritableBuffer()->writeUint(terminalPos,
|
return getWritableBuffer()->writeUint(terminalPos,
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
|
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TerminalPositionLookupTable::flushToFile(const char *const dictDirPath,
|
bool TerminalPositionLookupTable::flushToFile(const char *const dictDirPath) const {
|
||||||
const int newHeaderRegionSize) const {
|
// If the used buffer size is smaller than the actual buffer size, regenerate the lookup
|
||||||
const int headerRegionSizeDiff = newHeaderRegionSize - mHeaderRegionSize;
|
// table and write the new table to the file.
|
||||||
// If header region size has been changed or used buffer size is smaller than actual buffer
|
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
||||||
// size, regenerate lookup table and write the new table to file.
|
TerminalPositionLookupTable lookupTableToWrite;
|
||||||
if (headerRegionSizeDiff != 0 || getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
|
||||||
TerminalPositionLookupTable lookupTableToWrite(newHeaderRegionSize);
|
|
||||||
for (int i = 0; i < mSize; ++i) {
|
for (int i = 0; i < mSize; ++i) {
|
||||||
const int terminalPtNodePosition = getTerminalPtNodePosition(i);
|
const int terminalPtNodePosition = getTerminalPtNodePosition(i);
|
||||||
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
|
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
|
||||||
|
@ -68,7 +66,7 @@ bool TerminalPositionLookupTable::flushToFile(const char *const dictDirPath,
|
||||||
return lookupTableToWrite.flush(dictDirPath,
|
return lookupTableToWrite.flush(dictDirPath,
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||||
} else {
|
} else {
|
||||||
// We can simply use this lookup table because the header region size has not been
|
// We can simply use this lookup table because the buffer size has not been
|
||||||
// changed.
|
// changed.
|
||||||
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,19 +28,13 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
public:
|
public:
|
||||||
typedef hash_map_compat<int, int> TerminalIdMap;
|
typedef hash_map_compat<int, int> TerminalIdMap;
|
||||||
|
|
||||||
// TODO: Quit using headerRegionSize.
|
TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable)
|
||||||
TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable,
|
|
||||||
const int headerRegionSize)
|
|
||||||
: SingleDictContent(dictDirPath,
|
: SingleDictContent(dictDirPath,
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
|
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
|
||||||
mSize(getBuffer()->getTailPosition()
|
mSize(getBuffer()->getTailPosition()
|
||||||
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE),
|
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
|
||||||
mHeaderRegionSize(headerRegionSize) {}
|
|
||||||
|
|
||||||
explicit TerminalPositionLookupTable(const int headerRegionSize)
|
TerminalPositionLookupTable() : mSize(0) {}
|
||||||
: mSize(0), mHeaderRegionSize(headerRegionSize) {}
|
|
||||||
|
|
||||||
TerminalPositionLookupTable() : mSize(0), mHeaderRegionSize(0) {}
|
|
||||||
|
|
||||||
int getTerminalPtNodePosition(const int terminalId) const;
|
int getTerminalPtNodePosition(const int terminalId) const;
|
||||||
|
|
||||||
|
@ -50,7 +44,7 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
return mSize;
|
return mSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool flushToFile(const char *const dictDirPath, const int newHeaderRegionSize) const;
|
bool flushToFile(const char *const dictDirPath) const;
|
||||||
|
|
||||||
bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
|
bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
|
||||||
|
|
||||||
|
@ -62,7 +56,6 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
}
|
}
|
||||||
|
|
||||||
int mSize;
|
int mSize;
|
||||||
const int mHeaderRegionSize;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||||
|
|
|
@ -45,16 +45,22 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||||
AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
|
AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// Write header file.
|
||||||
|
if (!DictFileWritingUtils::flushBufferToFileInDir(tmpDirPath,
|
||||||
|
Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
|
||||||
|
AKLOGE("Dictionary header file %s/%s cannot be written.", tmpDirPath,
|
||||||
|
Ver4DictConstants::HEADER_FILE_EXTENSION);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// Write trie file.
|
// Write trie file.
|
||||||
const BufferWithExtendableBuffer *buffers[] = {headerBuffer, &mExpandableTrieBuffer};
|
if (!DictFileWritingUtils::flushBufferToFileInDir(tmpDirPath,
|
||||||
if (!DictFileWritingUtils::flushBuffersToFileInDir(tmpDirPath,
|
Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
|
||||||
Ver4DictConstants::TRIE_FILE_EXTENSION, buffers, 2 /* bufferCount */)) {
|
|
||||||
AKLOGE("Dictionary trie file %s/%s cannot be written.", tmpDirPath,
|
AKLOGE("Dictionary trie file %s/%s cannot be written.", tmpDirPath,
|
||||||
Ver4DictConstants::TRIE_FILE_EXTENSION);
|
Ver4DictConstants::TRIE_FILE_EXTENSION);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Write dictionary contents.
|
// Write dictionary contents.
|
||||||
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath, headerBuffer->getTailPosition())) {
|
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath)) {
|
||||||
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
|
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,9 +34,10 @@ class Ver4DictBuffers {
|
||||||
typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
|
typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
|
||||||
|
|
||||||
static AK_FORCE_INLINE Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
|
static AK_FORCE_INLINE Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
|
||||||
const MmappedBuffer::MmappedBufferPtr &dictBuffer) {
|
const MmappedBuffer::MmappedBufferPtr &headerBuffer) {
|
||||||
const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false;
|
const bool isUpdatable = headerBuffer.get() ? headerBuffer.get()->isUpdatable() : false;
|
||||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable));
|
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
|
||||||
|
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, headerBuffer, isUpdatable));
|
||||||
}
|
}
|
||||||
|
|
||||||
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
|
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
|
||||||
|
@ -121,16 +122,17 @@ class Ver4DictBuffers {
|
||||||
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
|
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
|
||||||
|
|
||||||
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
|
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
|
||||||
const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
|
const MmappedBuffer::MmappedBufferPtr &headerBuffer, const bool isUpdatable)
|
||||||
: mDictBuffer(dictBuffer),
|
: mHeaderBuffer(headerBuffer),
|
||||||
mHeaderPolicy(mDictBuffer.get()->getBuffer(), FormatUtils::VERSION_4),
|
mDictBuffer(MmappedBuffer::openBuffer(dictDirPath,
|
||||||
mExpandableHeaderBuffer(dictBuffer.get()->getBuffer(), mHeaderPolicy.getSize(),
|
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
|
||||||
|
mHeaderPolicy(headerBuffer.get()->getBuffer(), FormatUtils::VERSION_4),
|
||||||
|
mExpandableHeaderBuffer(headerBuffer.get()->getBuffer(), mHeaderPolicy.getSize(),
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mExpandableTrieBuffer(dictBuffer.get()->getBuffer() + mHeaderPolicy.getSize(),
|
mExpandableTrieBuffer(mDictBuffer.get()->getBuffer(),
|
||||||
dictBuffer.get()->getBufferSize() - mHeaderPolicy.getSize(),
|
mDictBuffer.get()->getBufferSize(),
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
// TODO: Quit using header size.
|
mTerminalPositionLookupTable(dictDirPath, isUpdatable),
|
||||||
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderPolicy.getSize()),
|
|
||||||
mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
|
mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
|
||||||
isUpdatable),
|
isUpdatable),
|
||||||
mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
|
mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
|
||||||
|
@ -139,7 +141,7 @@ class Ver4DictBuffers {
|
||||||
mIsUpdatable(isUpdatable) {}
|
mIsUpdatable(isUpdatable) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE Ver4DictBuffers(const HeaderPolicy *const headerPolicy)
|
AK_FORCE_INLINE Ver4DictBuffers(const HeaderPolicy *const headerPolicy)
|
||||||
: mDictBuffer(0), mHeaderPolicy(),
|
: mHeaderBuffer(0), mDictBuffer(0), mHeaderPolicy(),
|
||||||
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
mTerminalPositionLookupTable(),
|
mTerminalPositionLookupTable(),
|
||||||
|
@ -147,6 +149,7 @@ class Ver4DictBuffers {
|
||||||
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
|
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
|
||||||
mIsUpdatable(true) {}
|
mIsUpdatable(true) {}
|
||||||
|
|
||||||
|
const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
|
||||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer mExpandableHeaderBuffer;
|
BufferWithExtendableBuffer mExpandableHeaderBuffer;
|
||||||
|
|
|
@ -18,7 +18,9 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
// These values MUST match the definitions in FormatSpec.java.
|
||||||
const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
|
const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
|
||||||
|
const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
|
||||||
const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
|
const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
|
||||||
// tat = Terminal Address Table
|
// tat = Terminal Address Table
|
||||||
const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
|
const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
|
||||||
|
|
|
@ -25,6 +25,7 @@ namespace latinime {
|
||||||
class Ver4DictConstants {
|
class Ver4DictConstants {
|
||||||
public:
|
public:
|
||||||
static const char *const TRIE_FILE_EXTENSION;
|
static const char *const TRIE_FILE_EXTENSION;
|
||||||
|
static const char *const HEADER_FILE_EXTENSION;
|
||||||
static const char *const FREQ_FILE_EXTENSION;
|
static const char *const FREQ_FILE_EXTENSION;
|
||||||
static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
|
static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
|
||||||
static const char *const BIGRAM_FILE_EXTENSION;
|
static const char *const BIGRAM_FILE_EXTENSION;
|
||||||
|
|
|
@ -68,42 +68,43 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
char tmpFileName[tmpFileNameBufSize];
|
char tmpFileName[tmpFileNameBufSize];
|
||||||
FileUtils::getFilePathWithSuffix(filePath, TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE,
|
FileUtils::getFilePathWithSuffix(filePath, TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE,
|
||||||
tmpFileNameBufSize, tmpFileName);
|
tmpFileNameBufSize, tmpFileName);
|
||||||
const BufferWithExtendableBuffer *buffers[] = {dictHeader, dictBody};
|
if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictHeader)) {
|
||||||
if (!DictFileWritingUtils::flushBuffersToFile(tmpFileName, buffers, 2 /* bufferCount */)) {
|
AKLOGE("Dictionary header cannot be written to %s.", tmpFileName);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictBody)) {
|
||||||
AKLOGE("Dictionary structure cannot be written to %s.", tmpFileName);
|
AKLOGE("Dictionary structure cannot be written to %s.", tmpFileName);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (rename(tmpFileName, filePath) != 0) {
|
if (rename(tmpFileName, filePath) != 0) {
|
||||||
AKLOGE("Dictionary file %s cannot be renamed to %s", tmpFileName, filePath);;
|
AKLOGE("Dictionary file %s cannot be renamed to %s", tmpFileName, filePath);;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ bool DictFileWritingUtils::flushBuffersToFileInDir(const char *const dirPath,
|
/* static */ bool DictFileWritingUtils::flushBufferToFileInDir(const char *const dirPath,
|
||||||
const char *const fileName, const BufferWithExtendableBuffer **const buffers,
|
const char *const fileName, const BufferWithExtendableBuffer *const buffer) {
|
||||||
const int bufferCount) {
|
|
||||||
const int filePathBufSize = FileUtils::getFilePathBufSize(dirPath, fileName);
|
const int filePathBufSize = FileUtils::getFilePathBufSize(dirPath, fileName);
|
||||||
char filePath[filePathBufSize];
|
char filePath[filePathBufSize];
|
||||||
FileUtils::getFilePath(dirPath, fileName, filePathBufSize, filePath);
|
FileUtils::getFilePath(dirPath, fileName, filePathBufSize, filePath);
|
||||||
return flushBuffersToFile(filePath, buffers, bufferCount);
|
return flushBufferToFile(filePath, buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ bool DictFileWritingUtils::flushBuffersToFile(const char *const filePath,
|
/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
|
||||||
const BufferWithExtendableBuffer **const buffers, const int bufferCount) {
|
const BufferWithExtendableBuffer *const buffer) {
|
||||||
FILE *const file = fopen(filePath, "wb");
|
FILE *const file = fopen(filePath, "wb");
|
||||||
if (!file) {
|
if (!file) {
|
||||||
AKLOGE("File %s cannot be opened.", filePath);
|
AKLOGE("File %s cannot be opened.", filePath);
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < bufferCount; ++i) {
|
if (!writeBufferToFile(file, buffer)) {
|
||||||
if (!writeBufferToFile(file, buffers[i])) {
|
remove(filePath);
|
||||||
remove(filePath);
|
AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
|
||||||
AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
|
buffer->getTailPosition());
|
||||||
buffers[i]->getTailPosition());
|
ASSERT(false);
|
||||||
ASSERT(false);
|
return false;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
fclose(file);
|
fclose(file);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -37,8 +37,8 @@ class DictFileWritingUtils {
|
||||||
BufferWithExtendableBuffer *const dictHeader,
|
BufferWithExtendableBuffer *const dictHeader,
|
||||||
BufferWithExtendableBuffer *const dictBody);
|
BufferWithExtendableBuffer *const dictBody);
|
||||||
|
|
||||||
static bool flushBuffersToFileInDir(const char *const dirPath, const char *const fileName,
|
static bool flushBufferToFileInDir(const char *const dirPath, const char *const fileName,
|
||||||
const BufferWithExtendableBuffer **const buffers, const int bufferCount);
|
const BufferWithExtendableBuffer *const buffer);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
|
||||||
|
@ -46,8 +46,8 @@ class DictFileWritingUtils {
|
||||||
static bool createEmptyV4DictFile(const char *const filePath,
|
static bool createEmptyV4DictFile(const char *const filePath,
|
||||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
||||||
|
|
||||||
static bool flushBuffersToFile(const char *const filePath,
|
static bool flushBufferToFile(const char *const filePath,
|
||||||
const BufferWithExtendableBuffer **const buffers, const int bufferCount);
|
const BufferWithExtendableBuffer *const buffer);
|
||||||
|
|
||||||
static bool writeBufferToFile(FILE *const file,
|
static bool writeBufferToFile(FILE *const file,
|
||||||
const BufferWithExtendableBuffer *const buffer);
|
const BufferWithExtendableBuffer *const buffer);
|
||||||
|
|
|
@ -29,9 +29,10 @@ namespace latinime {
|
||||||
class FormatUtils {
|
class FormatUtils {
|
||||||
public:
|
public:
|
||||||
enum FORMAT_VERSION {
|
enum FORMAT_VERSION {
|
||||||
|
// These MUST have the same values as the relevant constants in FormatSpec.java.
|
||||||
VERSION_2 = 2,
|
VERSION_2 = 2,
|
||||||
VERSION_3 = 3,
|
VERSION_3 = 3,
|
||||||
VERSION_4 = 4,
|
VERSION_4 = 400,
|
||||||
UNKNOWN_VERSION = -1
|
UNKNOWN_VERSION = -1
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import android.util.Pair;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
|
import com.android.inputmethod.latin.utils.FileUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -102,7 +103,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
|
|
||||||
private File createEmptyDictionaryAndGetFile(final String dictId,
|
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||||
final int formatVersion) throws IOException {
|
final int formatVersion) throws IOException {
|
||||||
if (formatVersion == 4) {
|
if (formatVersion == FormatSpec.VERSION4) {
|
||||||
return createEmptyVer4DictionaryAndGetFile(dictId);
|
return createEmptyVer4DictionaryAndGetFile(dictId);
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("Dictionary format version " + formatVersion
|
throw new IOException("Dictionary format version " + formatVersion
|
||||||
|
@ -112,7 +113,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
|
private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
|
||||||
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
||||||
getContext().getCacheDir());
|
getContext().getCacheDir());
|
||||||
file.delete();
|
FileUtils.deleteRecursively(file);
|
||||||
file.mkdir();
|
file.mkdir();
|
||||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
||||||
|
@ -123,10 +124,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
||||||
FormatSpec.VERSION4, attributeMap)) {
|
FormatSpec.VERSION4, attributeMap)) {
|
||||||
return new File(file, FormatSpec.TRIE_FILE_EXTENSION);
|
return new File(file, FormatSpec.HEADER_FILE_EXTENSION);
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
|
throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
|
||||||
+ FormatSpec.TRIE_FILE_EXTENSION + " cannot be created.");
|
+ FormatSpec.HEADER_FILE_EXTENSION + " cannot be created.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -72,10 +72,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
||||||
FormatSpec.VERSION4, attributeMap)) {
|
FormatSpec.VERSION4, attributeMap)) {
|
||||||
return new File(file, FormatSpec.TRIE_FILE_EXTENSION);
|
return new File(file, FormatSpec.HEADER_FILE_EXTENSION);
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
|
throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
|
||||||
+ FormatSpec.TRIE_FILE_EXTENSION + " cannot be created.");
|
+ FormatSpec.HEADER_FILE_EXTENSION + " cannot be created.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -523,7 +523,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (fileHeader == null) return null;
|
if (fileHeader == null) return null;
|
||||||
return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
|
return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
|
||||||
address, fileHeader.mFormatOptions).mWord;
|
address, fileHeader.mFormatOptions).mWord;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -216,7 +216,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
|
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
|
||||||
final FileHeader fileHeader = dictDecoder.readHeader();
|
final FileHeader fileHeader = dictDecoder.readHeader();
|
||||||
assertEquals(word,
|
assertEquals(word,
|
||||||
BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
|
BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
|
||||||
position, fileHeader.mFormatOptions).mWord);
|
position, fileHeader.mFormatOptions).mWord);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Log.e(TAG, "Raised an IOException while looking up a word", e);
|
Log.e(TAG, "Raised an IOException while looking up a word", e);
|
||||||
|
|
|
@ -29,17 +29,18 @@ public class BinaryDictUtils {
|
||||||
|
|
||||||
public static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
public static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
||||||
|
|
||||||
public static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
|
public static final FormatSpec.FormatOptions VERSION2 =
|
||||||
|
new FormatSpec.FormatOptions(FormatSpec.VERSION2);
|
||||||
public static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
|
public static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
|
||||||
new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */);
|
new FormatSpec.FormatOptions(FormatSpec.VERSION3, false /* supportsDynamicUpdate */);
|
||||||
public static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
|
public static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
|
||||||
new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
|
new FormatSpec.FormatOptions(FormatSpec.VERSION3, true /* supportsDynamicUpdate */);
|
||||||
public static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE =
|
public static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE =
|
||||||
new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */);
|
new FormatSpec.FormatOptions(FormatSpec.VERSION4, false /* supportsDynamicUpdate */);
|
||||||
public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE =
|
public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE =
|
||||||
new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */);
|
new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* supportsDynamicUpdate */);
|
||||||
public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP =
|
public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP =
|
||||||
new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */,
|
new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* supportsDynamicUpdate */,
|
||||||
true /* hasTimestamp */);
|
true /* hasTimestamp */);
|
||||||
|
|
||||||
public static DictionaryOptions makeDictionaryOptions(final String id, final String version) {
|
public static DictionaryOptions makeDictionaryOptions(final String id, final String version) {
|
||||||
|
@ -53,9 +54,10 @@ public class BinaryDictUtils {
|
||||||
|
|
||||||
public static File getDictFile(final String name, final String version,
|
public static File getDictFile(final String name, final String version,
|
||||||
final FormatOptions formatOptions, final File directory) {
|
final FormatOptions formatOptions, final File directory) {
|
||||||
if (formatOptions.mVersion == 2 || formatOptions.mVersion == 3) {
|
if (formatOptions.mVersion == FormatSpec.VERSION2
|
||||||
|
|| formatOptions.mVersion == FormatSpec.VERSION3) {
|
||||||
return new File(directory, name + "." + version + TEST_DICT_FILE_EXTENSION);
|
return new File(directory, name + "." + version + TEST_DICT_FILE_EXTENSION);
|
||||||
} else if (formatOptions.mVersion == 4) {
|
} else if (formatOptions.mVersion == FormatSpec.VERSION4) {
|
||||||
return new File(directory, name + "." + version);
|
return new File(directory, name + "." + version);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("the format option has a wrong version : "
|
throw new RuntimeException("the format option has a wrong version : "
|
||||||
|
@ -67,7 +69,8 @@ public class BinaryDictUtils {
|
||||||
final File cacheDir) {
|
final File cacheDir) {
|
||||||
if (formatOptions.mVersion == FormatSpec.VERSION4) {
|
if (formatOptions.mVersion == FormatSpec.VERSION4) {
|
||||||
return new Ver4DictEncoder(cacheDir);
|
return new Ver4DictEncoder(cacheDir);
|
||||||
} else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) {
|
} else if (formatOptions.mVersion == FormatSpec.VERSION3
|
||||||
|
|| formatOptions.mVersion == FormatSpec.VERSION2) {
|
||||||
return new Ver3DictEncoder(file);
|
return new Ver3DictEncoder(file);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("The format option has a wrong version : "
|
throw new RuntimeException("The format option has a wrong version : "
|
||||||
|
@ -79,7 +82,7 @@ public class BinaryDictUtils {
|
||||||
throws UnsupportedFormatException {
|
throws UnsupportedFormatException {
|
||||||
if (formatOptions.mVersion == FormatSpec.VERSION4) {
|
if (formatOptions.mVersion == FormatSpec.VERSION4) {
|
||||||
return new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
|
return new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
|
||||||
} else if (formatOptions.mVersion == 3) {
|
} else if (formatOptions.mVersion == FormatSpec.VERSION3) {
|
||||||
return new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
|
return new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
|
||||||
} else {
|
} else {
|
||||||
throw new UnsupportedFormatException("The format option has a wrong version : "
|
throw new UnsupportedFormatException("The format option has a wrong version : "
|
||||||
|
|
|
@ -30,6 +30,7 @@ MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict
|
||||||
USED_TARGETTED_UTILS := \
|
USED_TARGETTED_UTILS := \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
|
||||||
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java
|
||||||
|
|
||||||
DICTTOOL_ONDEVICE_TESTS_DIRECTORY := \
|
DICTTOOL_ONDEVICE_TESTS_DIRECTORY := \
|
||||||
|
|
|
@ -159,9 +159,9 @@ public class DictionaryMaker {
|
||||||
if (OPTION_VERSION_2.equals(arg)) {
|
if (OPTION_VERSION_2.equals(arg)) {
|
||||||
// Do nothing, this is the default
|
// Do nothing, this is the default
|
||||||
} else if (OPTION_VERSION_3.equals(arg)) {
|
} else if (OPTION_VERSION_3.equals(arg)) {
|
||||||
outputBinaryFormatVersion = 3;
|
outputBinaryFormatVersion = FormatSpec.VERSION3;
|
||||||
} else if (OPTION_VERSION_4.equals(arg)) {
|
} else if (OPTION_VERSION_4.equals(arg)) {
|
||||||
outputBinaryFormatVersion = 4;
|
outputBinaryFormatVersion = FormatSpec.VERSION4;
|
||||||
} else if (OPTION_HELP.equals(arg)) {
|
} else if (OPTION_HELP.equals(arg)) {
|
||||||
displayHelp();
|
displayHelp();
|
||||||
} else {
|
} else {
|
||||||
|
@ -358,7 +358,7 @@ public class DictionaryMaker {
|
||||||
final File outputFile = new File(outputFilename);
|
final File outputFile = new File(outputFilename);
|
||||||
final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version);
|
final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version);
|
||||||
final DictEncoder dictEncoder;
|
final DictEncoder dictEncoder;
|
||||||
if (version == 4) {
|
if (version == FormatSpec.VERSION4) {
|
||||||
dictEncoder = new Ver4DictEncoder(outputFile);
|
dictEncoder = new Ver4DictEncoder(outputFile);
|
||||||
} else {
|
} else {
|
||||||
dictEncoder = new Ver3DictEncoder(outputFile);
|
dictEncoder = new Ver3DictEncoder(outputFile);
|
||||||
|
|
Loading…
Reference in a new issue