Merge "Add HeaderReaderInterface."
This commit is contained in:
commit
a83e25642f
7 changed files with 165 additions and 93 deletions
|
@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
|||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
|
||||
import com.android.inputmethod.latin.utils.JniUtils;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
@ -250,7 +251,7 @@ public final class BinaryDictDecoder {
|
|||
/**
|
||||
* Reads a string from a buffer. This is the converse of the above method.
|
||||
*/
|
||||
private static String readString(final FusionDictionaryBufferInterface buffer) {
|
||||
static String readString(final FusionDictionaryBufferInterface buffer) {
|
||||
final StringBuilder s = new StringBuilder();
|
||||
int character = readChar(buffer);
|
||||
while (character != FormatSpec.INVALID_CHARACTER) {
|
||||
|
@ -629,7 +630,7 @@ public final class BinaryDictDecoder {
|
|||
* @throws UnsupportedFormatException
|
||||
* @throws IOException
|
||||
*/
|
||||
private static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
|
||||
static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final int version = getFormatVersion(buffer);
|
||||
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||
|
@ -643,25 +644,22 @@ public final class BinaryDictDecoder {
|
|||
|
||||
/**
|
||||
* Reads a header from a buffer.
|
||||
* @param buffer the buffer to read.
|
||||
* @param headerReader the header reader
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
public static FileHeader readHeader(final FusionDictionaryBufferInterface buffer)
|
||||
public static FileHeader readHeader(final HeaderReaderInterface headerReader)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final int version = checkFormatVersion(buffer);
|
||||
final int optionsFlags = buffer.readUnsignedShort();
|
||||
final int version = headerReader.readVersion();
|
||||
final int optionsFlags = headerReader.readOptionFlags();
|
||||
|
||||
final HashMap<String, String> attributes = new HashMap<String, String>();
|
||||
final int headerSize;
|
||||
headerSize = buffer.readInt();
|
||||
final int headerSize = headerReader.readHeaderSize();
|
||||
|
||||
if (headerSize < 0) {
|
||||
throw new UnsupportedFormatException("header size can't be negative.");
|
||||
}
|
||||
|
||||
populateOptions(buffer, headerSize, attributes);
|
||||
buffer.position(headerSize);
|
||||
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
|
||||
|
||||
final FileHeader header = new FileHeader(headerSize,
|
||||
new FusionDictionary.DictionaryOptions(attributes,
|
||||
|
@ -711,14 +709,14 @@ public final class BinaryDictDecoder {
|
|||
}
|
||||
|
||||
// Read header
|
||||
final FileHeader header = readHeader(reader.getBuffer());
|
||||
final FileHeader fileHeader = readHeader(reader);
|
||||
|
||||
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||
final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize,
|
||||
reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions);
|
||||
final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize,
|
||||
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
|
||||
|
||||
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
|
||||
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
||||
if (null != dict) {
|
||||
for (final Word w : dict) {
|
||||
if (w.mIsBlacklistEntry) {
|
||||
|
|
|
@ -24,13 +24,13 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
|||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
@ -141,20 +141,20 @@ public final class BinaryDictIOUtils {
|
|||
* Reads unigrams and bigrams from the binary file.
|
||||
* Doesn't store a full memory representation of the dictionary.
|
||||
*
|
||||
* @param reader the reader.
|
||||
* @param dictReader the dict reader.
|
||||
* @param words the map to store the address as a key and the word as a value.
|
||||
* @param frequencies the map to store the address as a key and the frequency as a value.
|
||||
* @param bigrams the map to store the address as a key and the list of address as a value.
|
||||
* @throws IOException if the file can't be read.
|
||||
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||
*/
|
||||
public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
|
||||
public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader,
|
||||
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
|
||||
UnsupportedFormatException {
|
||||
// Read header
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(reader.getBuffer());
|
||||
readUnigramsAndBigramsBinaryInner(reader.getBuffer(), header.mHeaderSize, words,
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
||||
readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words,
|
||||
frequencies, bigrams, header.mFormatOptions);
|
||||
}
|
||||
|
||||
|
@ -162,20 +162,20 @@ public final class BinaryDictIOUtils {
|
|||
* Gets the address of the last CharGroup of the exact matching word in the dictionary.
|
||||
* If no match is found, returns NOT_VALID_WORD.
|
||||
*
|
||||
* @param reader the reader.
|
||||
* @param dictReader the dict reader.
|
||||
* @param word the word we search for.
|
||||
* @return the address of the terminal node.
|
||||
* @throws IOException if the file can't be read.
|
||||
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static int getTerminalPosition(final BinaryDictReader reader,
|
||||
public static int getTerminalPosition(final BinaryDictReader dictReader,
|
||||
final String word) throws IOException, UnsupportedFormatException {
|
||||
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
if (word == null) return FormatSpec.NOT_VALID_WORD;
|
||||
if (buffer.position() != 0) buffer.position(0);
|
||||
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(buffer);
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
||||
int wordPos = 0;
|
||||
final int wordLen = word.codePointCount(0, word.length());
|
||||
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
|
||||
|
@ -510,20 +510,20 @@ public final class BinaryDictIOUtils {
|
|||
/**
|
||||
* Find a word using the BinaryDictReader.
|
||||
*
|
||||
* @param reader the reader
|
||||
* @param dictReader the dict reader
|
||||
* @param word the word searched
|
||||
* @return the found group
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader reader,
|
||||
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader,
|
||||
final String word) throws IOException, UnsupportedFormatException {
|
||||
int position = getTerminalPosition(reader, word);
|
||||
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
|
||||
int position = getTerminalPosition(dictReader, word);
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
if (position != FormatSpec.NOT_VALID_WORD) {
|
||||
buffer.position(0);
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(buffer);
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
||||
buffer.position(position);
|
||||
return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions);
|
||||
}
|
||||
|
@ -544,16 +544,21 @@ public final class BinaryDictIOUtils {
|
|||
final File file, final long offset, final long length)
|
||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||
final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
|
||||
final FileInputStream inStream = new FileInputStream(file);
|
||||
try {
|
||||
inStream.read(buffer);
|
||||
final BinaryDictDecoder.ByteBufferWrapper wrapper =
|
||||
new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map(
|
||||
FileChannel.MapMode.READ_ONLY, offset, length));
|
||||
return BinaryDictDecoder.readHeader(wrapper);
|
||||
} finally {
|
||||
inStream.close();
|
||||
}
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() {
|
||||
@Override
|
||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file)
|
||||
throws FileNotFoundException, IOException {
|
||||
final FileInputStream inStream = new FileInputStream(file);
|
||||
try {
|
||||
inStream.read(buffer);
|
||||
return new ByteArrayWrapper(buffer);
|
||||
} finally {
|
||||
inStream.close();
|
||||
}
|
||||
}
|
||||
});
|
||||
return BinaryDictDecoder.readHeader(dictReader);
|
||||
}
|
||||
|
||||
public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
|
||||
|
|
|
@ -17,7 +17,9 @@
|
|||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
|
||||
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -27,8 +29,9 @@ import java.io.IOException;
|
|||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class BinaryDictReader {
|
||||
public class BinaryDictReader implements HeaderReaderInterface {
|
||||
|
||||
public interface FusionDictionaryBufferFactory {
|
||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
||||
|
@ -133,4 +136,34 @@ public class BinaryDictReader {
|
|||
openBuffer(factory);
|
||||
return getBuffer();
|
||||
}
|
||||
|
||||
// The implementation of HeaderReaderInterface
|
||||
@Override
|
||||
public int readVersion() throws IOException, UnsupportedFormatException {
|
||||
return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readOptionFlags() {
|
||||
return mFusionDictionaryBuffer.readUnsignedShort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readHeaderSize() {
|
||||
return mFusionDictionaryBuffer.readInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashMap<String, String> readAttributes(final int headerSize) {
|
||||
final HashMap<String, String> attributes = new HashMap<String, String>();
|
||||
while (mFusionDictionaryBuffer.position() < headerSize) {
|
||||
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
|
||||
// by calling CharEncoding.readString.
|
||||
final String key = CharEncoding.readString(mFusionDictionaryBuffer);
|
||||
final String value = CharEncoding.readString(mFusionDictionaryBuffer);
|
||||
attributes.put(key, value);
|
||||
}
|
||||
mFusionDictionaryBuffer.position(headerSize);
|
||||
return attributes;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,18 +49,18 @@ public final class DynamicBinaryDictIOUtils {
|
|||
/**
|
||||
* Delete the word from the binary file.
|
||||
*
|
||||
* @param reader the reader.
|
||||
* @param dictReader the dict reader.
|
||||
* @param word the word we delete
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static void deleteWord(final BinaryDictReader reader, final String word)
|
||||
public static void deleteWord(final BinaryDictReader dictReader, final String word)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
buffer.position(0);
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(buffer);
|
||||
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(reader, word);
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
||||
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
|
||||
if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
|
||||
|
||||
buffer.position(wordPosition);
|
||||
|
@ -236,7 +236,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
/**
|
||||
* Insert a word into a binary dictionary.
|
||||
*
|
||||
* @param reader the reader.
|
||||
* @param dictReader the dict reader.
|
||||
* @param destination a stream to the underlying file, with the pointer at the end of the file.
|
||||
* @param word the word to insert.
|
||||
* @param frequency the frequency of the new word.
|
||||
|
@ -249,16 +249,16 @@ public final class DynamicBinaryDictIOUtils {
|
|||
// TODO: Support batch insertion.
|
||||
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
|
||||
@UsedForTesting
|
||||
public static void insertWord(final BinaryDictReader reader, final OutputStream destination,
|
||||
public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination,
|
||||
final String word, final int frequency, final ArrayList<WeightedString> bigramStrings,
|
||||
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
|
||||
final boolean isBlackListEntry)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
|
||||
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
if (bigramStrings != null) {
|
||||
for (final WeightedString bigram : bigramStrings) {
|
||||
int position = BinaryDictIOUtils.getTerminalPosition(reader, bigram.mWord);
|
||||
int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord);
|
||||
if (position == FormatSpec.NOT_VALID_WORD) {
|
||||
// TODO: figure out what is the correct thing to do here.
|
||||
} else {
|
||||
|
@ -273,7 +273,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
|
||||
// find the insert position of the word.
|
||||
if (buffer.position() != 0) buffer.position(0);
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(buffer);
|
||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
||||
|
||||
int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position();
|
||||
final int[] codePoints = FusionDictionary.getCodePoints(word);
|
||||
|
@ -289,9 +289,9 @@ public final class DynamicBinaryDictIOUtils {
|
|||
for (int i = 0; i < charGroupCount; ++i) {
|
||||
address = buffer.position();
|
||||
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
|
||||
buffer.position(), header.mFormatOptions);
|
||||
buffer.position(), fileHeader.mFormatOptions);
|
||||
final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags,
|
||||
header.mFormatOptions);
|
||||
fileHeader.mFormatOptions);
|
||||
if (isMovedGroup) continue;
|
||||
nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS)
|
||||
? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address;
|
||||
|
@ -311,16 +311,16 @@ public final class DynamicBinaryDictIOUtils {
|
|||
final int newNodeAddress = buffer.limit();
|
||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1,
|
||||
isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
|
||||
false /* isBlackListEntry */, header.mFormatOptions);
|
||||
false /* isBlackListEntry */, fileHeader.mFormatOptions);
|
||||
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags,
|
||||
frequency, nodeParentAddress, shortcuts, bigrams, destination,
|
||||
buffer, nodeOriginAddress, address, header.mFormatOptions);
|
||||
buffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
|
||||
|
||||
final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
|
||||
currentInfo.mCharacters.length);
|
||||
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
updateParentAddresses(buffer, currentInfo.mChildrenAddress,
|
||||
newNodeAddress + written + 1, header.mFormatOptions);
|
||||
newNodeAddress + written + 1, fileHeader.mFormatOptions);
|
||||
}
|
||||
final CharGroupInfo newInfo2 = new CharGroupInfo(
|
||||
newNodeAddress + written + 1, -1 /* endAddress */,
|
||||
|
@ -352,17 +352,17 @@ public final class DynamicBinaryDictIOUtils {
|
|||
false /* isTerminal */, 0 /* childrenAddressSize*/,
|
||||
false /* hasShortcut */, false /* hasBigrams */,
|
||||
false /* isNotAWord */, false /* isBlackListEntry */,
|
||||
header.mFormatOptions);
|
||||
fileHeader.mFormatOptions);
|
||||
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p,
|
||||
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
|
||||
destination, buffer, nodeOriginAddress, address,
|
||||
header.mFormatOptions);
|
||||
fileHeader.mFormatOptions);
|
||||
|
||||
final int[] suffixCharacters = Arrays.copyOfRange(
|
||||
currentInfo.mCharacters, p, currentInfo.mCharacters.length);
|
||||
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
updateParentAddresses(buffer, currentInfo.mChildrenAddress,
|
||||
newNodeAddress + written + 1, header.mFormatOptions);
|
||||
newNodeAddress + written + 1, fileHeader.mFormatOptions);
|
||||
}
|
||||
final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags(
|
||||
suffixCharacters.length > 1,
|
||||
|
@ -371,21 +371,21 @@ public final class DynamicBinaryDictIOUtils {
|
|||
(currentInfo.mFlags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)
|
||||
!= 0,
|
||||
(currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0,
|
||||
isNotAWord, isBlackListEntry, header.mFormatOptions);
|
||||
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
|
||||
final CharGroupInfo suffixInfo = new CharGroupInfo(
|
||||
newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags,
|
||||
suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1,
|
||||
currentInfo.mChildrenAddress, currentInfo.mShortcutTargets,
|
||||
currentInfo.mBigrams);
|
||||
written += BinaryDictIOUtils.computeGroupSize(suffixInfo,
|
||||
header.mFormatOptions) + 1;
|
||||
fileHeader.mFormatOptions) + 1;
|
||||
|
||||
final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p,
|
||||
codePoints.length);
|
||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(
|
||||
newCharacters.length > 1, isTerminal,
|
||||
0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
|
||||
isNotAWord, isBlackListEntry, header.mFormatOptions);
|
||||
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
|
||||
final CharGroupInfo newInfo = new CharGroupInfo(
|
||||
newNodeAddress + written, -1 /* endAddress */, flags,
|
||||
newCharacters, frequency, newNodeAddress + 1,
|
||||
|
@ -407,13 +407,13 @@ public final class DynamicBinaryDictIOUtils {
|
|||
final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
|
||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
|
||||
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
|
||||
isNotAWord, isBlackListEntry, header.mFormatOptions);
|
||||
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
|
||||
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
|
||||
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
|
||||
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
|
||||
bigrams);
|
||||
moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address,
|
||||
header.mFormatOptions);
|
||||
fileHeader.mFormatOptions);
|
||||
return;
|
||||
}
|
||||
wordPos += currentInfo.mCharacters.length;
|
||||
|
@ -432,12 +432,12 @@ public final class DynamicBinaryDictIOUtils {
|
|||
*/
|
||||
final int newNodeAddress = buffer.limit();
|
||||
updateChildrenAddress(buffer, address, newNodeAddress,
|
||||
header.mFormatOptions);
|
||||
fileHeader.mFormatOptions);
|
||||
final int newGroupAddress = newNodeAddress + 1;
|
||||
final boolean hasMultipleChars = (wordLen - wordPos) > 1;
|
||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
|
||||
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
|
||||
isNotAWord, isBlackListEntry, header.mFormatOptions);
|
||||
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
|
||||
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
|
||||
final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
|
||||
characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
|
||||
|
@ -482,7 +482,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
|
||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1,
|
||||
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
|
||||
isNotAWord, isBlackListEntry, header.mFormatOptions);
|
||||
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
|
||||
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
|
||||
-1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
|
||||
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.makedict.decoder;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* An interface to read a binary dictionary file header.
|
||||
*/
|
||||
public interface HeaderReaderInterface {
|
||||
public int readVersion() throws IOException, UnsupportedFormatException;
|
||||
public int readOptionFlags();
|
||||
public int readHeaderSize();
|
||||
public HashMap<String, String> readAttributes(final int headerSize);
|
||||
}
|
|
@ -497,21 +497,21 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
// Tests for getTerminalPosition
|
||||
private String getWordFromBinary(final BinaryDictReader reader, final int address) {
|
||||
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
|
||||
private String getWordFromBinary(final BinaryDictReader dictReader, final int address) {
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
if (buffer.position() != 0) buffer.position(0);
|
||||
|
||||
FileHeader header = null;
|
||||
FileHeader fileHeader = null;
|
||||
try {
|
||||
header = BinaryDictDecoder.readHeader(buffer);
|
||||
fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
} catch (UnsupportedFormatException e) {
|
||||
return null;
|
||||
}
|
||||
if (header == null) return null;
|
||||
return BinaryDictDecoder.getWordAtAddress(buffer, header.mHeaderSize,
|
||||
address - header.mHeaderSize, header.mFormatOptions).mWord;
|
||||
if (fileHeader == null) return null;
|
||||
return BinaryDictDecoder.getWordAtAddress(buffer, fileHeader.mHeaderSize,
|
||||
address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord;
|
||||
}
|
||||
|
||||
private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index,
|
||||
|
|
|
@ -128,22 +128,24 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static void printBinaryFile(final FusionDictionaryBufferInterface buffer)
|
||||
private static void printBinaryFile(final BinaryDictReader dictReader)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
FileHeader header = BinaryDictDecoder.readHeader(buffer);
|
||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
while (buffer.position() < buffer.limit()) {
|
||||
printNode(buffer, header.mFormatOptions);
|
||||
printNode(buffer, fileHeader.mFormatOptions);
|
||||
}
|
||||
}
|
||||
|
||||
private int getWordPosition(final File file, final String word) {
|
||||
int position = FormatSpec.NOT_VALID_WORD;
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
inStream = new FileInputStream(file);
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
position = BinaryDictIOUtils.getTerminalPosition(reader, word);
|
||||
dictReader.openBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
position = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
} finally {
|
||||
|
@ -159,11 +161,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private CharGroupInfo findWordFromFile(final File file, final String word) {
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
CharGroupInfo info = null;
|
||||
try {
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
info = BinaryDictIOUtils.findWordByBinaryDictReader(reader, word);
|
||||
dictReader.openBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictReader, word);
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
}
|
||||
|
@ -174,18 +177,18 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
private long insertAndCheckWord(final File file, final String word, final int frequency,
|
||||
final boolean exist, final ArrayList<WeightedString> bigrams,
|
||||
final ArrayList<WeightedString> shortcuts) {
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
BufferedOutputStream outStream = null;
|
||||
long amountOfTime = -1;
|
||||
try {
|
||||
reader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
|
||||
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
|
||||
outStream = new BufferedOutputStream(new FileOutputStream(file, true));
|
||||
|
||||
if (!exist) {
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
|
||||
}
|
||||
final long now = System.nanoTime();
|
||||
DynamicBinaryDictIOUtils.insertWord(reader, outStream, word, frequency, bigrams,
|
||||
DynamicBinaryDictIOUtils.insertWord(dictReader, outStream, word, frequency, bigrams,
|
||||
shortcuts, false, false);
|
||||
amountOfTime = System.nanoTime() - now;
|
||||
outStream.flush();
|
||||
|
@ -208,24 +211,25 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private void deleteWord(final File file, final String word) {
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
try {
|
||||
reader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
|
||||
DynamicBinaryDictIOUtils.deleteWord(reader, word);
|
||||
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
|
||||
DynamicBinaryDictIOUtils.deleteWord(dictReader, word);
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
}
|
||||
}
|
||||
|
||||
private void checkReverseLookup(final File file, final String word, final int position) {
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
try {
|
||||
final FusionDictionaryBufferInterface buffer = reader.openAndGetBuffer(
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.openAndGetBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(buffer);
|
||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
||||
assertEquals(word,
|
||||
BinaryDictDecoder.getWordAtAddress(reader.getBuffer(), header.mHeaderSize,
|
||||
position - header.mHeaderSize, header.mFormatOptions).mWord);
|
||||
BinaryDictDecoder.getWordAtAddress(dictReader.getBuffer(),
|
||||
fileHeader.mHeaderSize, position - fileHeader.mHeaderSize,
|
||||
fileHeader.mFormatOptions).mWord);
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Raised an IOException while looking up a word", e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
|
|
Loading…
Reference in a new issue