Add HeaderReaderInterface.

Change-Id: I298f86b70d18cd08b240509b6f757c72e1a59ffe
main
Yuichiro Hanada 2013-08-14 12:03:06 +09:00
parent 606a056b53
commit d794b42f98
7 changed files with 165 additions and 93 deletions

View File

@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
import com.android.inputmethod.latin.utils.JniUtils; import com.android.inputmethod.latin.utils.JniUtils;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
@ -250,7 +251,7 @@ public final class BinaryDictDecoder {
/** /**
* Reads a string from a buffer. This is the converse of the above method. * Reads a string from a buffer. This is the converse of the above method.
*/ */
private static String readString(final FusionDictionaryBufferInterface buffer) { static String readString(final FusionDictionaryBufferInterface buffer) {
final StringBuilder s = new StringBuilder(); final StringBuilder s = new StringBuilder();
int character = readChar(buffer); int character = readChar(buffer);
while (character != FormatSpec.INVALID_CHARACTER) { while (character != FormatSpec.INVALID_CHARACTER) {
@ -629,7 +630,7 @@ public final class BinaryDictDecoder {
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
* @throws IOException * @throws IOException
*/ */
private static int checkFormatVersion(final FusionDictionaryBufferInterface buffer) static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final int version = getFormatVersion(buffer); final int version = getFormatVersion(buffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
@ -643,25 +644,22 @@ public final class BinaryDictDecoder {
/** /**
* Reads a header from a buffer. * Reads a header from a buffer.
* @param buffer the buffer to read. * @param headerReader the header reader
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
public static FileHeader readHeader(final FusionDictionaryBufferInterface buffer) public static FileHeader readHeader(final HeaderReaderInterface headerReader)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final int version = checkFormatVersion(buffer); final int version = headerReader.readVersion();
final int optionsFlags = buffer.readUnsignedShort(); final int optionsFlags = headerReader.readOptionFlags();
final HashMap<String, String> attributes = new HashMap<String, String>(); final int headerSize = headerReader.readHeaderSize();
final int headerSize;
headerSize = buffer.readInt();
if (headerSize < 0) { if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative."); throw new UnsupportedFormatException("header size can't be negative.");
} }
populateOptions(buffer, headerSize, attributes); final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
buffer.position(headerSize);
final FileHeader header = new FileHeader(headerSize, final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes, new FusionDictionary.DictionaryOptions(attributes,
@ -711,14 +709,14 @@ public final class BinaryDictDecoder {
} }
// Read header // Read header
final FileHeader header = readHeader(reader.getBuffer()); final FileHeader fileHeader = readHeader(reader);
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>(); Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize, final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize,
reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions); reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
if (null != dict) { if (null != dict) {
for (final Word w : dict) { for (final Word w : dict) {
if (w.mIsBlacklistEntry) { if (w.mIsBlacklistEntry) {

View File

@ -24,13 +24,13 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.channels.FileChannel;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
@ -141,20 +141,20 @@ public final class BinaryDictIOUtils {
* Reads unigrams and bigrams from the binary file. * Reads unigrams and bigrams from the binary file.
* Doesn't store a full memory representation of the dictionary. * Doesn't store a full memory representation of the dictionary.
* *
* @param reader the reader. * @param dictReader the dict reader.
* @param words the map to store the address as a key and the word as a value. * @param words the map to store the address as a key and the word as a value.
* @param frequencies the map to store the address as a key and the frequency as a value. * @param frequencies the map to store the address as a key and the frequency as a value.
* @param bigrams the map to store the address as a key and the list of address as a value. * @param bigrams the map to store the address as a key and the list of address as a value.
* @throws IOException if the file can't be read. * @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized. * @throws UnsupportedFormatException if the format of the file is not recognized.
*/ */
public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader, public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader,
final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
UnsupportedFormatException { UnsupportedFormatException {
// Read header // Read header
final FileHeader header = BinaryDictDecoder.readHeader(reader.getBuffer()); final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
readUnigramsAndBigramsBinaryInner(reader.getBuffer(), header.mHeaderSize, words, readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words,
frequencies, bigrams, header.mFormatOptions); frequencies, bigrams, header.mFormatOptions);
} }
@ -162,20 +162,20 @@ public final class BinaryDictIOUtils {
* Gets the address of the last CharGroup of the exact matching word in the dictionary. * Gets the address of the last CharGroup of the exact matching word in the dictionary.
* If no match is found, returns NOT_VALID_WORD. * If no match is found, returns NOT_VALID_WORD.
* *
* @param reader the reader. * @param dictReader the dict reader.
* @param word the word we search for. * @param word the word we search for.
* @return the address of the terminal node. * @return the address of the terminal node.
* @throws IOException if the file can't be read. * @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized. * @throws UnsupportedFormatException if the format of the file is not recognized.
*/ */
@UsedForTesting @UsedForTesting
public static int getTerminalPosition(final BinaryDictReader reader, public static int getTerminalPosition(final BinaryDictReader dictReader,
final String word) throws IOException, UnsupportedFormatException { final String word) throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
if (word == null) return FormatSpec.NOT_VALID_WORD; if (word == null) return FormatSpec.NOT_VALID_WORD;
if (buffer.position() != 0) buffer.position(0); if (buffer.position() != 0) buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
int wordPos = 0; int wordPos = 0;
final int wordLen = word.codePointCount(0, word.length()); final int wordLen = word.codePointCount(0, word.length());
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
@ -510,20 +510,20 @@ public final class BinaryDictIOUtils {
/** /**
* Find a word using the BinaryDictReader. * Find a word using the BinaryDictReader.
* *
* @param reader the reader * @param dictReader the dict reader
* @param word the word searched * @param word the word searched
* @return the found group * @return the found group
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
@UsedForTesting @UsedForTesting
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader reader, public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader,
final String word) throws IOException, UnsupportedFormatException { final String word) throws IOException, UnsupportedFormatException {
int position = getTerminalPosition(reader, word); int position = getTerminalPosition(dictReader, word);
final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
if (position != FormatSpec.NOT_VALID_WORD) { if (position != FormatSpec.NOT_VALID_WORD) {
buffer.position(0); buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
buffer.position(position); buffer.position(position);
return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions); return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions);
} }
@ -544,16 +544,21 @@ public final class BinaryDictIOUtils {
final File file, final long offset, final long length) final File file, final long offset, final long length)
throws FileNotFoundException, IOException, UnsupportedFormatException { throws FileNotFoundException, IOException, UnsupportedFormatException {
final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE]; final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
final FileInputStream inStream = new FileInputStream(file); final BinaryDictReader dictReader = new BinaryDictReader(file);
try { dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() {
inStream.read(buffer); @Override
final BinaryDictDecoder.ByteBufferWrapper wrapper = public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file)
new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map( throws FileNotFoundException, IOException {
FileChannel.MapMode.READ_ONLY, offset, length)); final FileInputStream inStream = new FileInputStream(file);
return BinaryDictDecoder.readHeader(wrapper); try {
} finally { inStream.read(buffer);
inStream.close(); return new ByteArrayWrapper(buffer);
} } finally {
inStream.close();
}
}
});
return BinaryDictDecoder.readHeader(dictReader);
} }
public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset, public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,

View File

@ -17,7 +17,9 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
import com.android.inputmethod.latin.utils.ByteArrayWrapper; import com.android.inputmethod.latin.utils.ByteArrayWrapper;
import java.io.File; import java.io.File;
@ -27,8 +29,9 @@ import java.io.IOException;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.util.HashMap;
public class BinaryDictReader { public class BinaryDictReader implements HeaderReaderInterface {
public interface FusionDictionaryBufferFactory { public interface FusionDictionaryBufferFactory {
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
@ -133,4 +136,34 @@ public class BinaryDictReader {
openBuffer(factory); openBuffer(factory);
return getBuffer(); return getBuffer();
} }
// The implementation of HeaderReaderInterface
@Override
public int readVersion() throws IOException, UnsupportedFormatException {
return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer);
}
@Override
public int readOptionFlags() {
return mFusionDictionaryBuffer.readUnsignedShort();
}
@Override
public int readHeaderSize() {
return mFusionDictionaryBuffer.readInt();
}
@Override
public HashMap<String, String> readAttributes(final int headerSize) {
final HashMap<String, String> attributes = new HashMap<String, String>();
while (mFusionDictionaryBuffer.position() < headerSize) {
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
// by calling CharEncoding.readString.
final String key = CharEncoding.readString(mFusionDictionaryBuffer);
final String value = CharEncoding.readString(mFusionDictionaryBuffer);
attributes.put(key, value);
}
mFusionDictionaryBuffer.position(headerSize);
return attributes;
}
} }

View File

@ -49,18 +49,18 @@ public final class DynamicBinaryDictIOUtils {
/** /**
* Delete the word from the binary file. * Delete the word from the binary file.
* *
* @param reader the reader. * @param dictReader the dict reader.
* @param word the word we delete * @param word the word we delete
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
@UsedForTesting @UsedForTesting
public static void deleteWord(final BinaryDictReader reader, final String word) public static void deleteWord(final BinaryDictReader dictReader, final String word)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
buffer.position(0); buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(reader, word); final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
if (wordPosition == FormatSpec.NOT_VALID_WORD) return; if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
buffer.position(wordPosition); buffer.position(wordPosition);
@ -236,7 +236,7 @@ public final class DynamicBinaryDictIOUtils {
/** /**
* Insert a word into a binary dictionary. * Insert a word into a binary dictionary.
* *
* @param reader the reader. * @param dictReader the dict reader.
* @param destination a stream to the underlying file, with the pointer at the end of the file. * @param destination a stream to the underlying file, with the pointer at the end of the file.
* @param word the word to insert. * @param word the word to insert.
* @param frequency the frequency of the new word. * @param frequency the frequency of the new word.
@ -249,16 +249,16 @@ public final class DynamicBinaryDictIOUtils {
// TODO: Support batch insertion. // TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@UsedForTesting @UsedForTesting
public static void insertWord(final BinaryDictReader reader, final OutputStream destination, public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination,
final String word, final int frequency, final ArrayList<WeightedString> bigramStrings, final String word, final int frequency, final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
final boolean isBlackListEntry) final boolean isBlackListEntry)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>(); final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
if (bigramStrings != null) { if (bigramStrings != null) {
for (final WeightedString bigram : bigramStrings) { for (final WeightedString bigram : bigramStrings) {
int position = BinaryDictIOUtils.getTerminalPosition(reader, bigram.mWord); int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord);
if (position == FormatSpec.NOT_VALID_WORD) { if (position == FormatSpec.NOT_VALID_WORD) {
// TODO: figure out what is the correct thing to do here. // TODO: figure out what is the correct thing to do here.
} else { } else {
@ -273,7 +273,7 @@ public final class DynamicBinaryDictIOUtils {
// find the insert position of the word. // find the insert position of the word.
if (buffer.position() != 0) buffer.position(0); if (buffer.position() != 0) buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position(); int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position();
final int[] codePoints = FusionDictionary.getCodePoints(word); final int[] codePoints = FusionDictionary.getCodePoints(word);
@ -289,9 +289,9 @@ public final class DynamicBinaryDictIOUtils {
for (int i = 0; i < charGroupCount; ++i) { for (int i = 0; i < charGroupCount; ++i) {
address = buffer.position(); address = buffer.position();
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
buffer.position(), header.mFormatOptions); buffer.position(), fileHeader.mFormatOptions);
final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags,
header.mFormatOptions); fileHeader.mFormatOptions);
if (isMovedGroup) continue; if (isMovedGroup) continue;
nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS)
? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address; ? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address;
@ -311,16 +311,16 @@ public final class DynamicBinaryDictIOUtils {
final int newNodeAddress = buffer.limit(); final int newNodeAddress = buffer.limit();
final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1, final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1,
isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
false /* isBlackListEntry */, header.mFormatOptions); false /* isBlackListEntry */, fileHeader.mFormatOptions);
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags,
frequency, nodeParentAddress, shortcuts, bigrams, destination, frequency, nodeParentAddress, shortcuts, bigrams, destination,
buffer, nodeOriginAddress, address, header.mFormatOptions); buffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p, final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
currentInfo.mCharacters.length); currentInfo.mCharacters.length);
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
updateParentAddresses(buffer, currentInfo.mChildrenAddress, updateParentAddresses(buffer, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, header.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions);
} }
final CharGroupInfo newInfo2 = new CharGroupInfo( final CharGroupInfo newInfo2 = new CharGroupInfo(
newNodeAddress + written + 1, -1 /* endAddress */, newNodeAddress + written + 1, -1 /* endAddress */,
@ -352,17 +352,17 @@ public final class DynamicBinaryDictIOUtils {
false /* isTerminal */, 0 /* childrenAddressSize*/, false /* isTerminal */, 0 /* childrenAddressSize*/,
false /* hasShortcut */, false /* hasBigrams */, false /* hasShortcut */, false /* hasBigrams */,
false /* isNotAWord */, false /* isBlackListEntry */, false /* isNotAWord */, false /* isBlackListEntry */,
header.mFormatOptions); fileHeader.mFormatOptions);
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p,
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null, prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
destination, buffer, nodeOriginAddress, address, destination, buffer, nodeOriginAddress, address,
header.mFormatOptions); fileHeader.mFormatOptions);
final int[] suffixCharacters = Arrays.copyOfRange( final int[] suffixCharacters = Arrays.copyOfRange(
currentInfo.mCharacters, p, currentInfo.mCharacters.length); currentInfo.mCharacters, p, currentInfo.mCharacters.length);
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
updateParentAddresses(buffer, currentInfo.mChildrenAddress, updateParentAddresses(buffer, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, header.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions);
} }
final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags( final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags(
suffixCharacters.length > 1, suffixCharacters.length > 1,
@ -371,21 +371,21 @@ public final class DynamicBinaryDictIOUtils {
(currentInfo.mFlags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) (currentInfo.mFlags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)
!= 0, != 0,
(currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0, (currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0,
isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final CharGroupInfo suffixInfo = new CharGroupInfo( final CharGroupInfo suffixInfo = new CharGroupInfo(
newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags, newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags,
suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1, suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1,
currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, currentInfo.mChildrenAddress, currentInfo.mShortcutTargets,
currentInfo.mBigrams); currentInfo.mBigrams);
written += BinaryDictIOUtils.computeGroupSize(suffixInfo, written += BinaryDictIOUtils.computeGroupSize(suffixInfo,
header.mFormatOptions) + 1; fileHeader.mFormatOptions) + 1;
final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p, final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p,
codePoints.length); codePoints.length);
final int flags = BinaryDictEncoder.makeCharGroupFlags( final int flags = BinaryDictEncoder.makeCharGroupFlags(
newCharacters.length > 1, isTerminal, newCharacters.length > 1, isTerminal,
0 /* childrenAddressSize */, hasShortcuts, hasBigrams, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final CharGroupInfo newInfo = new CharGroupInfo( final CharGroupInfo newInfo = new CharGroupInfo(
newNodeAddress + written, -1 /* endAddress */, flags, newNodeAddress + written, -1 /* endAddress */, flags,
newCharacters, frequency, newNodeAddress + 1, newCharacters, frequency, newNodeAddress + 1,
@ -407,13 +407,13 @@ public final class DynamicBinaryDictIOUtils {
final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency, -1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts, nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
bigrams); bigrams);
moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address, moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address,
header.mFormatOptions); fileHeader.mFormatOptions);
return; return;
} }
wordPos += currentInfo.mCharacters.length; wordPos += currentInfo.mCharacters.length;
@ -432,12 +432,12 @@ public final class DynamicBinaryDictIOUtils {
*/ */
final int newNodeAddress = buffer.limit(); final int newNodeAddress = buffer.limit();
updateChildrenAddress(buffer, address, newNodeAddress, updateChildrenAddress(buffer, address, newNodeAddress,
header.mFormatOptions); fileHeader.mFormatOptions);
final int newGroupAddress = newNodeAddress + 1; final int newGroupAddress = newNodeAddress + 1;
final boolean hasMultipleChars = (wordLen - wordPos) > 1; final boolean hasMultipleChars = (wordLen - wordPos) > 1;
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
@ -482,7 +482,7 @@ public final class DynamicBinaryDictIOUtils {
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1, final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1,
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
-1 /* endAddress */, flags, characters, frequency, nodeParentAddress, -1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);

View File

@ -0,0 +1,32 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict.decoder;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import java.io.IOException;
import java.util.HashMap;
/**
* An interface to read a binary dictionary file header.
*/
public interface HeaderReaderInterface {
public int readVersion() throws IOException, UnsupportedFormatException;
public int readOptionFlags();
public int readHeaderSize();
public HashMap<String, String> readAttributes(final int headerSize);
}

View File

@ -493,21 +493,21 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
// Tests for getTerminalPosition // Tests for getTerminalPosition
private String getWordFromBinary(final BinaryDictReader reader, final int address) { private String getWordFromBinary(final BinaryDictReader dictReader, final int address) {
final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
if (buffer.position() != 0) buffer.position(0); if (buffer.position() != 0) buffer.position(0);
FileHeader header = null; FileHeader fileHeader = null;
try { try {
header = BinaryDictDecoder.readHeader(buffer); fileHeader = BinaryDictDecoder.readHeader(dictReader);
} catch (IOException e) { } catch (IOException e) {
return null; return null;
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
return null; return null;
} }
if (header == null) return null; if (fileHeader == null) return null;
return BinaryDictDecoder.getWordAtAddress(buffer, header.mHeaderSize, return BinaryDictDecoder.getWordAtAddress(buffer, fileHeader.mHeaderSize,
address - header.mHeaderSize, header.mFormatOptions).mWord; address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord;
} }
private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index, private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index,

View File

@ -126,22 +126,24 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
} }
private static void printBinaryFile(final FusionDictionaryBufferInterface buffer) private static void printBinaryFile(final BinaryDictReader dictReader)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
while (buffer.position() < buffer.limit()) { while (buffer.position() < buffer.limit()) {
printNode(buffer, header.mFormatOptions); printNode(buffer, fileHeader.mFormatOptions);
} }
} }
private int getWordPosition(final File file, final String word) { private int getWordPosition(final File file, final String word) {
int position = FormatSpec.NOT_VALID_WORD; int position = FormatSpec.NOT_VALID_WORD;
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictReader dictReader = new BinaryDictReader(file);
FileInputStream inStream = null; FileInputStream inStream = null;
try { try {
inStream = new FileInputStream(file); inStream = new FileInputStream(file);
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); dictReader.openBuffer(
position = BinaryDictIOUtils.getTerminalPosition(reader, word); new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
position = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} finally { } finally {
@ -157,11 +159,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
private CharGroupInfo findWordFromFile(final File file, final String word) { private CharGroupInfo findWordFromFile(final File file, final String word) {
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictReader dictReader = new BinaryDictReader(file);
CharGroupInfo info = null; CharGroupInfo info = null;
try { try {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); dictReader.openBuffer(
info = BinaryDictIOUtils.findWordByBinaryDictReader(reader, word); new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictReader, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} }
@ -172,18 +175,18 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private long insertAndCheckWord(final File file, final String word, final int frequency, private long insertAndCheckWord(final File file, final String word, final int frequency,
final boolean exist, final ArrayList<WeightedString> bigrams, final boolean exist, final ArrayList<WeightedString> bigrams,
final ArrayList<WeightedString> shortcuts) { final ArrayList<WeightedString> shortcuts) {
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictReader dictReader = new BinaryDictReader(file);
BufferedOutputStream outStream = null; BufferedOutputStream outStream = null;
long amountOfTime = -1; long amountOfTime = -1;
try { try {
reader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory()); dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
outStream = new BufferedOutputStream(new FileOutputStream(file, true)); outStream = new BufferedOutputStream(new FileOutputStream(file, true));
if (!exist) { if (!exist) {
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
} }
final long now = System.nanoTime(); final long now = System.nanoTime();
DynamicBinaryDictIOUtils.insertWord(reader, outStream, word, frequency, bigrams, DynamicBinaryDictIOUtils.insertWord(dictReader, outStream, word, frequency, bigrams,
shortcuts, false, false); shortcuts, false, false);
amountOfTime = System.nanoTime() - now; amountOfTime = System.nanoTime() - now;
outStream.flush(); outStream.flush();
@ -206,24 +209,25 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
private void deleteWord(final File file, final String word) { private void deleteWord(final File file, final String word) {
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictReader dictReader = new BinaryDictReader(file);
try { try {
reader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory()); dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
DynamicBinaryDictIOUtils.deleteWord(reader, word); DynamicBinaryDictIOUtils.deleteWord(dictReader, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} }
} }
private void checkReverseLookup(final File file, final String word, final int position) { private void checkReverseLookup(final File file, final String word, final int position) {
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictReader dictReader = new BinaryDictReader(file);
try { try {
final FusionDictionaryBufferInterface buffer = reader.openAndGetBuffer( final FusionDictionaryBufferInterface buffer = dictReader.openAndGetBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
assertEquals(word, assertEquals(word,
BinaryDictDecoder.getWordAtAddress(reader.getBuffer(), header.mHeaderSize, BinaryDictDecoder.getWordAtAddress(dictReader.getBuffer(),
position - header.mHeaderSize, header.mFormatOptions).mWord); fileHeader.mHeaderSize, position - fileHeader.mHeaderSize,
fileHeader.mFormatOptions).mWord);
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "Raised an IOException while looking up a word", e); Log.e(TAG, "Raised an IOException while looking up a word", e);
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {