2013-08-15 10:11:09 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2013 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package com.android.inputmethod.latin.makedict;
|
|
|
|
|
|
|
|
import com.android.inputmethod.annotations.UsedForTesting;
|
2013-08-19 05:49:57 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
|
|
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
2013-08-20 04:15:27 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
|
|
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
2013-08-22 02:07:52 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
2013-08-20 08:01:47 +00:00
|
|
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
2013-08-15 10:11:09 +00:00
|
|
|
|
2013-08-23 08:10:57 +00:00
|
|
|
import android.util.Log;
|
|
|
|
|
2013-08-15 10:11:09 +00:00
|
|
|
import java.io.File;
|
|
|
|
import java.io.FileNotFoundException;
|
|
|
|
import java.io.IOException;
|
2013-08-20 08:01:47 +00:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Arrays;
|
2013-08-15 10:11:09 +00:00
|
|
|
|
2013-08-20 06:52:02 +00:00
|
|
|
/**
|
2013-12-12 06:08:10 +00:00
|
|
|
* An implementation of DictDecoder for version 2 binary dictionary.
|
2013-08-20 06:52:02 +00:00
|
|
|
*/
|
2013-08-19 05:49:57 +00:00
|
|
|
@UsedForTesting
|
2013-12-12 06:08:10 +00:00
|
|
|
public class Ver2DictDecoder extends AbstractDictDecoder {
|
|
|
|
private static final String TAG = Ver2DictDecoder.class.getSimpleName();
|
2013-08-15 10:11:09 +00:00
|
|
|
|
2013-10-01 08:59:50 +00:00
|
|
|
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
|
2013-08-20 12:05:05 +00:00
|
|
|
private static int readFrequency(final DictBuffer dictBuffer) {
|
2013-08-20 08:01:47 +00:00
|
|
|
return dictBuffer.readUnsignedByte();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-30 02:42:52 +00:00
|
|
|
protected final File mDictionaryBinaryFile;
|
2013-08-22 13:43:20 +00:00
|
|
|
private final DictionaryBufferFactory mBufferFactory;
|
2013-09-30 02:42:52 +00:00
|
|
|
protected DictBuffer mDictBuffer;
|
2013-08-15 10:11:09 +00:00
|
|
|
|
2013-12-12 06:08:10 +00:00
|
|
|
/* package */ Ver2DictDecoder(final File file, final int factoryFlag) {
|
2013-08-19 05:49:57 +00:00
|
|
|
mDictionaryBinaryFile = file;
|
|
|
|
mDictBuffer = null;
|
2013-08-22 13:43:20 +00:00
|
|
|
|
|
|
|
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
|
|
|
|
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
|
|
|
|
} else if ((factoryFlag & MASK_DICTBUFFER) == USE_BYTEARRAY) {
|
|
|
|
mBufferFactory = new DictionaryBufferFromByteArrayFactory();
|
|
|
|
} else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) {
|
|
|
|
mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory();
|
|
|
|
} else {
|
|
|
|
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-12 06:08:10 +00:00
|
|
|
/* package */ Ver2DictDecoder(final File file, final DictionaryBufferFactory factory) {
|
2013-08-22 13:43:20 +00:00
|
|
|
mDictionaryBinaryFile = file;
|
|
|
|
mBufferFactory = factory;
|
2013-08-19 05:49:57 +00:00
|
|
|
}
|
2013-08-15 10:11:09 +00:00
|
|
|
|
2013-09-12 08:46:09 +00:00
|
|
|
@Override
|
2013-08-22 13:43:20 +00:00
|
|
|
public void openDictBuffer() throws FileNotFoundException, IOException {
|
|
|
|
mDictBuffer = mBufferFactory.getDictionaryBuffer(mDictionaryBinaryFile);
|
2013-08-15 10:11:09 +00:00
|
|
|
}
|
|
|
|
|
2013-09-12 08:46:09 +00:00
|
|
|
@Override
|
2013-08-20 12:05:05 +00:00
|
|
|
public boolean isDictBufferOpen() {
|
2013-09-12 08:46:09 +00:00
|
|
|
return mDictBuffer != null;
|
|
|
|
}
|
|
|
|
|
2013-08-22 13:43:20 +00:00
|
|
|
/* package */ DictBuffer getDictBuffer() {
|
2013-08-19 05:49:57 +00:00
|
|
|
return mDictBuffer;
|
2013-08-15 10:11:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
@UsedForTesting
|
2013-08-22 13:43:20 +00:00
|
|
|
/* package */ DictBuffer openAndGetDictBuffer() throws FileNotFoundException, IOException {
|
|
|
|
openDictBuffer();
|
2013-08-19 05:49:57 +00:00
|
|
|
return getDictBuffer();
|
|
|
|
}
|
2013-08-15 10:11:09 +00:00
|
|
|
|
2013-08-20 06:52:02 +00:00
|
|
|
@Override
|
2013-08-20 04:15:27 +00:00
|
|
|
public FileHeader readHeader() throws IOException, UnsupportedFormatException {
|
2013-08-22 13:43:20 +00:00
|
|
|
if (mDictBuffer == null) {
|
|
|
|
openDictBuffer();
|
|
|
|
}
|
2013-08-20 12:05:05 +00:00
|
|
|
final FileHeader header = super.readHeader(mDictBuffer);
|
|
|
|
final int version = header.mFormatOptions.mVersion;
|
|
|
|
if (!(version >= 2 && version <= 3)) {
|
|
|
|
throw new UnsupportedFormatException("File header has a wrong version : " + version);
|
2013-08-19 05:49:57 +00:00
|
|
|
}
|
2013-08-20 04:15:27 +00:00
|
|
|
return header;
|
2013-08-15 10:11:09 +00:00
|
|
|
}
|
2013-08-20 08:01:47 +00:00
|
|
|
|
|
|
|
// TODO: Make this buffer multi thread safe.
|
|
|
|
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
|
|
|
|
@Override
|
2013-08-22 02:07:52 +00:00
|
|
|
public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) {
|
2013-08-21 06:27:36 +00:00
|
|
|
int addressPointer = ptNodePos;
|
2013-08-20 08:01:47 +00:00
|
|
|
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
2013-08-20 12:05:05 +00:00
|
|
|
addressPointer += FormatSpec.PTNODE_FLAGS_SIZE;
|
2013-08-20 08:01:47 +00:00
|
|
|
|
|
|
|
final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options);
|
|
|
|
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
|
2013-08-20 12:05:05 +00:00
|
|
|
addressPointer += FormatSpec.PARENT_ADDRESS_SIZE;
|
2013-08-20 08:01:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
final int characters[];
|
|
|
|
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
|
|
|
|
int index = 0;
|
|
|
|
int character = CharEncoding.readChar(mDictBuffer);
|
|
|
|
addressPointer += CharEncoding.getCharSize(character);
|
2013-08-20 12:05:05 +00:00
|
|
|
while (FormatSpec.INVALID_CHARACTER != character) {
|
2013-08-20 08:01:47 +00:00
|
|
|
// FusionDictionary is making sure that the length of the word is smaller than
|
|
|
|
// MAX_WORD_LENGTH.
|
|
|
|
// So we'll never write past the end of mCharacterBuffer.
|
|
|
|
mCharacterBuffer[index++] = character;
|
|
|
|
character = CharEncoding.readChar(mDictBuffer);
|
|
|
|
addressPointer += CharEncoding.getCharSize(character);
|
|
|
|
}
|
|
|
|
characters = Arrays.copyOfRange(mCharacterBuffer, 0, index);
|
|
|
|
} else {
|
|
|
|
final int character = CharEncoding.readChar(mDictBuffer);
|
|
|
|
addressPointer += CharEncoding.getCharSize(character);
|
|
|
|
characters = new int[] { character };
|
|
|
|
}
|
|
|
|
final int frequency;
|
|
|
|
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
|
|
|
|
frequency = PtNodeReader.readFrequency(mDictBuffer);
|
2013-08-20 12:05:05 +00:00
|
|
|
addressPointer += FormatSpec.PTNODE_FREQUENCY_SIZE;
|
2013-08-20 08:01:47 +00:00
|
|
|
} else {
|
2013-08-22 02:07:52 +00:00
|
|
|
frequency = PtNode.NOT_A_TERMINAL;
|
2013-08-20 08:01:47 +00:00
|
|
|
}
|
|
|
|
int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
|
|
|
|
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
|
|
|
childrenAddress += addressPointer;
|
|
|
|
}
|
|
|
|
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
2013-08-22 13:01:19 +00:00
|
|
|
final ArrayList<WeightedString> shortcutTargets;
|
2013-08-20 08:01:47 +00:00
|
|
|
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
|
2013-08-22 13:01:19 +00:00
|
|
|
// readShortcut will add shortcuts to shortcutTargets.
|
|
|
|
shortcutTargets = new ArrayList<WeightedString>();
|
2013-08-20 08:01:47 +00:00
|
|
|
addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
|
2013-08-22 13:01:19 +00:00
|
|
|
} else {
|
|
|
|
shortcutTargets = null;
|
2013-08-20 08:01:47 +00:00
|
|
|
}
|
2013-08-22 13:01:19 +00:00
|
|
|
|
|
|
|
final ArrayList<PendingAttribute> bigrams;
|
2013-08-20 08:01:47 +00:00
|
|
|
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
|
|
|
bigrams = new ArrayList<PendingAttribute>();
|
2013-12-12 06:08:10 +00:00
|
|
|
addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
|
2013-08-20 12:05:05 +00:00
|
|
|
addressPointer);
|
2013-08-22 02:07:52 +00:00
|
|
|
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
2013-10-01 13:00:03 +00:00
|
|
|
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
|
|
|
|
+ " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
|
2013-08-20 08:01:47 +00:00
|
|
|
}
|
2013-08-22 13:01:19 +00:00
|
|
|
} else {
|
|
|
|
bigrams = null;
|
2013-08-20 08:01:47 +00:00
|
|
|
}
|
2013-08-22 02:07:52 +00:00
|
|
|
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
|
2013-08-20 08:01:47 +00:00
|
|
|
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
|
|
|
}
|
2013-08-22 13:43:20 +00:00
|
|
|
|
|
|
|
@Override
|
2013-09-11 03:08:19 +00:00
|
|
|
public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
|
|
|
|
final boolean deleteDictIfBroken)
|
2013-08-22 13:43:20 +00:00
|
|
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
|
|
|
if (mDictBuffer == null) {
|
|
|
|
openDictBuffer();
|
|
|
|
}
|
2013-08-23 08:10:57 +00:00
|
|
|
try {
|
|
|
|
return BinaryDictDecoderUtils.readDictionaryBinary(this, dict);
|
|
|
|
} catch (IOException e) {
|
|
|
|
Log.e(TAG, "The dictionary " + mDictionaryBinaryFile.getName() + " is broken.", e);
|
2013-09-11 03:08:19 +00:00
|
|
|
if (deleteDictIfBroken && !mDictionaryBinaryFile.delete()) {
|
2013-08-23 08:10:57 +00:00
|
|
|
Log.e(TAG, "Failed to delete the broken dictionary.");
|
|
|
|
}
|
|
|
|
throw e;
|
|
|
|
} catch (UnsupportedFormatException e) {
|
|
|
|
Log.e(TAG, "The dictionary " + mDictionaryBinaryFile.getName() + " is broken.", e);
|
2013-09-11 03:08:19 +00:00
|
|
|
if (deleteDictIfBroken && !mDictionaryBinaryFile.delete()) {
|
2013-08-23 08:10:57 +00:00
|
|
|
Log.e(TAG, "Failed to delete the broken dictionary.");
|
|
|
|
}
|
|
|
|
throw e;
|
|
|
|
}
|
2013-08-22 13:43:20 +00:00
|
|
|
}
|
2013-08-23 14:23:03 +00:00
|
|
|
|
2013-09-11 09:20:00 +00:00
|
|
|
@Override
|
|
|
|
public void setPosition(int newPos) {
|
|
|
|
mDictBuffer.position(newPos);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public int getPosition() {
|
|
|
|
return mDictBuffer.position();
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public int readPtNodeCount() {
|
|
|
|
return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2013-08-20 12:05:05 +00:00
|
|
|
public boolean readAndFollowForwardLink() {
|
2013-09-11 09:20:00 +00:00
|
|
|
final int nextAddress = mDictBuffer.readUnsignedInt24();
|
|
|
|
if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) {
|
|
|
|
mDictBuffer.position(nextAddress);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean hasNextPtNodeArray() {
|
|
|
|
return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS;
|
|
|
|
}
|
2013-10-01 08:21:21 +00:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public void skipPtNode(final FormatOptions formatOptions) {
|
|
|
|
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
|
|
|
PtNodeReader.readParentAddress(mDictBuffer, formatOptions);
|
|
|
|
BinaryDictIOUtils.skipString(mDictBuffer,
|
|
|
|
(flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
|
|
|
PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions);
|
|
|
|
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readFrequency(mDictBuffer);
|
|
|
|
if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
|
|
|
|
final int shortcutsSize = mDictBuffer.readUnsignedShort();
|
|
|
|
mDictBuffer.position(mDictBuffer.position() + shortcutsSize
|
|
|
|
- FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
|
|
|
|
}
|
|
|
|
if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
|
|
|
|
int bigramCount = 0;
|
|
|
|
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
|
|
|
final int bigramFlags = mDictBuffer.readUnsignedByte();
|
|
|
|
switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
|
|
|
|
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
|
|
|
|
mDictBuffer.readUnsignedByte();
|
|
|
|
break;
|
|
|
|
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
|
|
|
|
mDictBuffer.readUnsignedShort();
|
|
|
|
break;
|
|
|
|
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
|
|
|
|
mDictBuffer.readUnsignedInt24();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if ((bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT) == 0) break;
|
|
|
|
}
|
|
|
|
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
|
|
|
throw new RuntimeException("Too many bigrams in a PtNode.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-08-15 10:11:09 +00:00
|
|
|
}
|