From fb7e08ea8f40ebb4ac57e443f837043e7b57fd87 Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Fri, 28 Sep 2012 17:03:23 +0900 Subject: [PATCH] Add writeCharGroup. bug: 6669677 Change-Id: I36792ba9c511a5148c963096cc93ca8c2e0ee04e --- .../latin/makedict/BinaryDictIOUtils.java | 127 +++++++++++++++++- .../latin/makedict/BinaryDictInputOutput.java | 9 +- 2 files changed, 129 insertions(+), 7 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index c34d19d22..5c6417b04 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -22,9 +22,13 @@ import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictio import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Stack; @@ -251,7 +255,7 @@ public final class BinaryDictIOUtils { buffer.put((byte)newFlags); } - private static void putSInt24(final FusionDictionaryBufferInterface buffer, + private static void writeSInt24ToBuffer(final FusionDictionaryBufferInterface buffer, final int value) { final int absValue = Math.abs(value); buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); @@ -259,6 +263,32 @@ public final class BinaryDictIOUtils { buffer.put((byte)(absValue & 0xFF)); } + private static void writeSInt24ToStream(final OutputStream destination, final int value) + throws IOException { + final int absValue = Math.abs(value); + destination.write((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); + destination.write((byte)((absValue >> 8) & 0xFF)); + destination.write((byte)(absValue & 0xFF)); + } + + private static void writeVariableAddress(final OutputStream destination, final int value) + throws IOException { + switch (BinaryDictInputOutput.getByteSize(value)) { + case 1: + destination.write((byte)value); + break; + case 2: + destination.write((byte)(0xFF & (value >> 8))); + destination.write((byte)(0xFF & value)); + break; + case 3: + destination.write((byte)(0xFF & (value >> 16))); + destination.write((byte)(0xFF & (value >> 8))); + destination.write((byte)(0xFF & value)); + break; + } + } + /** * Update a parent address in a CharGroup that is addressed by groupOriginAddress. * @@ -277,7 +307,7 @@ public final class BinaryDictIOUtils { } final int flags = buffer.readUnsignedByte(); final int parentOffset = newParentAddress - groupOriginAddress; - putSInt24(buffer, parentOffset); + writeSInt24ToBuffer(buffer, parentOffset); buffer.position(originalPosition); } @@ -293,6 +323,22 @@ public final class BinaryDictIOUtils { } } + private static void writeString(final OutputStream destination, final String word) + throws IOException { + final int length = word.length(); + for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { + final int codePoint = word.codePointAt(i); + if (CharEncoding.getCharSize(codePoint) == 1) { + destination.write((byte)codePoint); + } else { + destination.write((byte)(0xFF & (codePoint >> 16))); + destination.write((byte)(0xFF & (codePoint >> 8))); + destination.write((byte)(0xFF & codePoint)); + } + } + destination.write((byte)FormatSpec.GROUP_CHARACTERS_TERMINATOR); + } + /** * Update a children address in a CharGroup that is addressed by groupOriginAddress. * @@ -312,7 +358,82 @@ public final class BinaryDictIOUtils { if ((FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte(); final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position(); - putSInt24(buffer, childrenOffset); + writeSInt24ToBuffer(buffer, childrenOffset); buffer.position(originalPosition); } + + /** + * Write a char group to an output stream. + * A char group is an in-memory representation of a node in trie. + * A char group info is an on-disk representation of a node. + * + * @param destination the stream to write. + * @param info the char group info to be written. + */ + public static void writeCharGroup(final OutputStream destination, final CharGroupInfo info) + throws IOException { + destination.write((byte)info.mFlags); + final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ? + FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress; + writeSInt24ToStream(destination, parentOffset); + + for (int i = 0; i < info.mCharacters.length; ++i) { + if (CharEncoding.getCharSize(info.mCharacters[i]) == 1) { + destination.write((byte)info.mCharacters[i]); + } else { + writeSInt24ToStream(destination, info.mCharacters[i]); + } + } + if (info.mCharacters.length > 1) { + destination.write((byte)FormatSpec.GROUP_CHARACTERS_TERMINATOR); + } + + if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { + destination.write((byte)info.mFrequency); + } + + final int childrenOffset = info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ? + 0 : info.mChildrenAddress - info.mOriginalAddress; + writeSInt24ToStream(destination, childrenOffset); + + if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) { + final int shortcutListSize = + BinaryDictInputOutput.getShortcutListSize(info.mShortcutTargets); + destination.write((byte)(shortcutListSize >> 8)); + destination.write((byte)(shortcutListSize & 0xFF)); + final Iterator shortcutIterator = info.mShortcutTargets.iterator(); + while (shortcutIterator.hasNext()) { + final WeightedString target = shortcutIterator.next(); + destination.write((byte)BinaryDictInputOutput.makeShortcutFlags( + shortcutIterator.hasNext(), target.mFrequency)); + writeString(destination, target.mWord); + } + } + + if (info.mBigrams != null) { + // TODO: Consolidate this code with the code that computes the size of the bigram list + // in BinaryDictionaryInputOutput#computeActualNodeSize + for (int i = 0; i < info.mBigrams.size(); ++i) { + final int bigramOffset = info.mBigrams.get(i).mAddress - info.mOriginalAddress; + final int bigramFrequency = info.mBigrams.get(i).mFrequency; + int bigramFlags = (i < info.mBigrams.size() - 1) + ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0; + bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0; + switch (BinaryDictInputOutput.getByteSize(bigramOffset)) { + case 1: + bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; + break; + case 2: + bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; + break; + case 3: + bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; + break; + } + bigramFlags |= bigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY; + destination.write((byte)bigramFlags); + writeVariableAddress(destination, Math.abs(bigramOffset)); + } + } + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 3e6965a17..ba29523c8 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -153,7 +153,7 @@ public final class BinaryDictInputOutput { * @param character the character code. * @return the size in binary encoded-form, either 1 or 3 bytes. */ - private static int getCharSize(final int character) { + static int getCharSize(final int character) { // See char encoding in FusionDictionary.java if (fitsOnOneByte(character)) return 1; if (FormatSpec.INVALID_CHARACTER == character) return 1; @@ -337,7 +337,7 @@ public final class BinaryDictInputOutput { * This is known in advance and does not change according to position in the file * like address lists do. */ - private static int getShortcutListSize(final ArrayList shortcutList) { + static int getShortcutListSize(final ArrayList shortcutList) { if (null == shortcutList) return 0; int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE; for (final WeightedString shortcut : shortcutList) { @@ -438,7 +438,7 @@ public final class BinaryDictInputOutput { * @param address the address * @return the byte size. */ - private static int getByteSize(final int address) { + static int getByteSize(final int address) { assert(address <= UINT24_MAX); if (!hasChildrenAddress(address)) { return 0; @@ -858,7 +858,7 @@ public final class BinaryDictInputOutput { * @param frequency the frequency of the attribute, 0..15 * @return the flags */ - private static final int makeShortcutFlags(final boolean more, final int frequency) { + static final int makeShortcutFlags(final boolean more, final int frequency) { return (more ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0) + (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY); } @@ -896,6 +896,7 @@ public final class BinaryDictInputOutput { */ private static int writePlacedNode(final FusionDictionary dict, byte[] buffer, final Node node, final FormatOptions formatOptions) { + // TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup int index = node.mCachedAddress; final int groupCount = node.mData.size();