diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java new file mode 100644 index 000000000..2963e3771 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +import android.util.Log; + +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.FusionDictionary; +import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.PendingAttribute; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +/** + * Reads and writes Binary files for a UserHistoryDictionary. + * + * All the methods in this class are static. + */ +public class UserHistoryDictIOUtils { + private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); + private static final boolean DEBUG = false; + + public interface OnAddWordListener { + public void setUnigram(final String word, final String shortcutTarget, final int frequency); + public void setBigram(final String word1, final String word2, final int frequency); + } + + public interface BigramDictionaryInterface { + public int getFrequency(final String word1, final String word2); + } + + public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface { + private byte[] mBuffer; + private int mPosition; + + ByteArrayWrapper(final byte[] buffer) { + mBuffer = buffer; + mPosition = 0; + } + + @Override + public int readUnsignedByte() { + return ((int)mBuffer[mPosition++]) & 0xFF; + } + + @Override + public int readUnsignedShort() { + final int retval = readUnsignedByte(); + return (retval << 8) + readUnsignedByte(); + } + + @Override + public int readUnsignedInt24() { + final int retval = readUnsignedShort(); + return (retval << 8) + readUnsignedByte(); + } + + @Override + public int readInt() { + final int retval = readUnsignedShort(); + return (retval << 16) + readUnsignedShort(); + } + + @Override + public int position() { + return mPosition; + } + + @Override + public void position(int position) { + mPosition = position; + } + } + + /** + * Writes dictionary to file. + */ + public static void writeDictionaryBinary(final OutputStream destination, + final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, + final int version) { + + final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); + + try { + BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version); + } catch (IOException e) { + Log.e(TAG, "IO exception while writing file: " + e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported fomat: " + e); + } + } + + /** + * Constructs a new FusionDictionary from BigramDictionaryInterface. + */ + /* packages for test */ static FusionDictionary constructFusionDictionary( + final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { + + final FusionDictionary fusionDict = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions( + new HashMap(), false, false)); + + for (final String word1 : bigrams.keySet()) { + final HashMap word1Bigrams = bigrams.getBigrams(word1); + for (final String word2 : word1Bigrams.keySet()) { + final int freq = dict.getFrequency(word1, word2); + + if (DEBUG) { + if (word1 == null) { + Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); + } else { + Log.d(TAG, "add bigram: " + word1 + + "," + word2 + "," + Integer.toString(freq)); + } + } + + if (word1 == null) { // unigram + fusionDict.add(word2, freq, null); + } else { // bigram + fusionDict.setBigram(word1, word2, freq); + } + bigrams.updateBigram(word1, word2, (byte)freq); + } + } + + return fusionDict; + } + + /** + * Reads dictionary from file. + */ + public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer, + final OnAddWordListener dict) { + final Map unigrams = CollectionUtils.newTreeMap(); + final Map frequencies = CollectionUtils.newTreeMap(); + final Map> bigrams = CollectionUtils.newTreeMap(); + + try { + BinaryDictInputOutput.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies, + bigrams); + addWordsFromWordMap(unigrams, frequencies, bigrams, dict); + } catch (IOException e) { + Log.e(TAG, "IO exception while reading file: " + e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format: " + e); + } + } + + /** + * Adds all unigrams and bigrams in maps to OnAddWordListener. + */ + /* package for test */ static void addWordsFromWordMap(final Map unigrams, + final Map frequencies, + final Map> bigrams, final OnAddWordListener to) { + + for (Map.Entry entry : unigrams.entrySet()) { + final String word1 = entry.getValue(); + final int unigramFrequency = frequencies.get(entry.getKey()); + to.setUnigram(word1, null, unigramFrequency); + + final ArrayList attrList = bigrams.get(entry.getKey()); + + if (attrList != null) { + for (final PendingAttribute attr : attrList) { + to.setBigram(word1, unigrams.get(attr.mAddress), + BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency, + attr.mFrequency)); + } + } + } + + } +} \ No newline at end of file diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 9f7f41331..bb7d1b2d2 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -189,7 +189,7 @@ public class BinaryDictInputOutput { // suspicion that a bug might be causing an infinite loop. private static final int MAX_PASSES = 24; - private interface FusionDictionaryBufferInterface { + public interface FusionDictionaryBufferInterface { public int readUnsignedByte(); public int readUnsignedShort(); public int readUnsignedInt24(); @@ -234,7 +234,6 @@ public class BinaryDictInputOutput { @Override public void position(int newPos) { mBuffer.position(newPos); - return; } } @@ -1393,7 +1392,6 @@ public class BinaryDictInputOutput { final FusionDictionaryBufferInterface buffer, final int headerSize, final Map words, final Map frequencies, final Map> bigrams) { - int[] pushedChars = new int[MAX_WORD_LENGTH + 1]; Stack stack = new Stack(); @@ -1443,8 +1441,6 @@ public class BinaryDictInputOutput { stack.push(childrenPos); } } - - return; } /** @@ -1462,7 +1458,6 @@ public class BinaryDictInputOutput { final Map words, final Map frequencies, final Map> bigrams) throws IOException, UnsupportedFormatException { - // Read header final int version = checkFormatVersion(buffer); final int optionsFlags = buffer.readUnsignedShort(); @@ -1507,10 +1502,8 @@ public class BinaryDictInputOutput { * @throws UnsupportedFormatException */ private static int readHeader(final FusionDictionaryBufferInterface buffer, - final HashMap options, - final int version) + final HashMap options, final int version) throws IOException, UnsupportedFormatException { - final int headerSize; if (version < FIRST_VERSION_WITH_HEADER_SIZE) { headerSize = buffer.position(); @@ -1523,7 +1516,6 @@ public class BinaryDictInputOutput { if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } - return headerSize; } @@ -1561,7 +1553,6 @@ public class BinaryDictInputOutput { public static FusionDictionary readDictionaryBinary( final FusionDictionaryBufferInterface buffer, final FusionDictionary dict) throws IOException, UnsupportedFormatException { - // clear cache wordCache.clear(); diff --git a/tests/src/com/android/inputmethod/latin/UserHistoryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/UserHistoryDictIOUtilsTests.java new file mode 100644 index 000000000..8f0551b4c --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/UserHistoryDictIOUtilsTests.java @@ -0,0 +1,249 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface; +import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener; +import com.android.inputmethod.latin.makedict.FusionDictionary; +import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; + +import android.content.Context; +import android.test.AndroidTestCase; +import android.util.Log; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; + +/** + * Unit tests for UserHistoryDictIOUtils + */ +public class UserHistoryDictIOUtilsTests extends AndroidTestCase + implements BigramDictionaryInterface { + + private static final String TAG = UserHistoryDictIOUtilsTests.class.getSimpleName(); + private static final int UNIGRAM_FREQUENCY = 50; + private static final int BIGRAM_FREQUENCY = 100; + private static final ArrayList NOT_HAVE_BIGRAM = new ArrayList(); + + /** + * Return same frequency for all words and bigrams + */ + @Override + public int getFrequency(String word1, String word2) { + if (word1 == null) return UNIGRAM_FREQUENCY; + return BIGRAM_FREQUENCY; + } + + // Utilities for Testing + + private void addWord(final String word, + final HashMap > addedWords) { + if (!addedWords.containsKey(word)) { + addedWords.put(word, new ArrayList()); + } + } + + private void addBigram(final String word1, final String word2, + final HashMap > addedWords) { + addWord(word1, addedWords); + addWord(word2, addedWords); + addedWords.get(word1).add(word2); + } + + private void addBigramToBigramList(final String word1, final String word2, + final HashMap > addedWords, + final UserHistoryDictionaryBigramList bigramList) { + bigramList.addBigram(null, word1); + bigramList.addBigram(word1, word2); + + addBigram(word1, word2, addedWords); + } + + private void checkWordInFusionDict(final FusionDictionary dict, final String word, + final ArrayList expectedBigrams) { + final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word); + assertNotNull(group); + assertTrue(group.isTerminal()); + + for (final String bigram : expectedBigrams) { + assertNotNull(group.getBigram(bigram)); + } + } + + private void checkWordsInFusionDict(final FusionDictionary dict, + final HashMap > bigrams) { + for (final String word : bigrams.keySet()) { + if (bigrams.containsKey(word)) { + checkWordInFusionDict(dict, word, bigrams.get(word)); + } else { + checkWordInFusionDict(dict, word, NOT_HAVE_BIGRAM); + } + } + } + + private void checkWordInBigramList( + final UserHistoryDictionaryBigramList bigramList, final String word, + final ArrayList expectedBigrams) { + // check unigram + final HashMap unigramMap = bigramList.getBigrams(null); + assertTrue(unigramMap.containsKey(word)); + + // check bigrams + final ArrayList actualBigrams = new ArrayList( + bigramList.getBigrams(word).keySet()); + + Collections.sort(expectedBigrams); + Collections.sort(actualBigrams); + assertEquals(expectedBigrams, actualBigrams); + } + + private void checkWordsInBigramList(final UserHistoryDictionaryBigramList bigramList, + final HashMap > addedWords) { + for (final String word : addedWords.keySet()) { + if (addedWords.containsKey(word)) { + checkWordInBigramList(bigramList, word, addedWords.get(word)); + } else { + checkWordInBigramList(bigramList, word, NOT_HAVE_BIGRAM); + } + } + } + + private void writeDictToFile(final File file, + final UserHistoryDictionaryBigramList bigramList) { + try { + final FileOutputStream out = new FileOutputStream(file); + UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, 2); + out.flush(); + out.close(); + } catch (IOException e) { + Log.e(TAG, "IO exception while writing file: " + e); + } + } + + private void readDictFromFile(final File file, final OnAddWordListener listener) { + FileInputStream inStream = null; + + try { + inStream = new FileInputStream(file); + final byte[] buffer = new byte[(int)file.length()]; + inStream.read(buffer); + + UserHistoryDictIOUtils.readDictionaryBinary( + new UserHistoryDictIOUtils.ByteArrayWrapper(buffer), listener); + } catch (FileNotFoundException e) { + Log.e(TAG, "file not found: " + e); + } catch (IOException e) { + Log.e(TAG, "IOException: " + e); + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing + } + } + } + } + + public void testGenerateFusionDictionary() { + final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); + + final HashMap > addedWords = + new HashMap>(); + addBigramToBigramList("this", "is", addedWords, originalList); + addBigramToBigramList("this", "was", addedWords, originalList); + addBigramToBigramList("hello", "world", addedWords, originalList); + + final FusionDictionary fusionDict = + UserHistoryDictIOUtils.constructFusionDictionary(this, originalList); + + checkWordsInFusionDict(fusionDict, addedWords); + } + + public void testReadAndWrite() { + final Context context = getContext(); + + File file = null; + try { + file = File.createTempFile("testReadAndWrite", ".dict"); + } catch (IOException e) { + Log.d(TAG, "IOException while creating a temporary file: " + e); + } + assertNotNull(file); + + // make original dictionary + final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); + final HashMap> addedWords = CollectionUtils.newHashMap(); + addBigramToBigramList("this" , "is" , addedWords, originalList); + addBigramToBigramList("this" , "was" , addedWords, originalList); + addBigramToBigramList("is" , "not" , addedWords, originalList); + addBigramToBigramList("hello", "world", addedWords, originalList); + + // write to file + writeDictToFile(file, originalList); + + // make result dict. + final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList(); + final OnAddWordListener listener = new OnAddWordListener() { + @Override + public void setUnigram(final String word, + final String shortcutTarget, final int frequency) { + Log.d(TAG, "in: setUnigram: " + word + "," + frequency); + resultList.addBigram(null, word, (byte)frequency); + } + @Override + public void setBigram(final String word1, final String word2, final int frequency) { + Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); + resultList.addBigram(word1, word2, (byte)frequency); + } + }; + + // load from file + readDictFromFile(file, listener); + checkWordsInBigramList(resultList, addedWords); + + // add new bigram + addBigramToBigramList("hello", "java", addedWords, resultList); + + // rewrite + writeDictToFile(file, resultList); + final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList(); + final OnAddWordListener listener2 = new OnAddWordListener() { + @Override + public void setUnigram(final String word, + final String shortcutTarget, final int frequency) { + Log.d(TAG, "in: setUnigram: " + word + "," + frequency); + resultList2.addBigram(null, word, (byte)frequency); + } + @Override + public void setBigram(final String word1, final String word2, final int frequency) { + Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); + resultList2.addBigram(word1, word2, (byte)frequency); + } + }; + + // load from file + readDictFromFile(file, listener2); + checkWordsInBigramList(resultList2, addedWords); + } +}