- separate dict (uses xml)

- retrieve bigrams that only starts with character typed and neighbor keys
- contacts bigram
- performance measure

bug: 2873133

Change-Id: If97c005b18c82f3fafef50009dd2dfd972b0ab8f
This commit is contained in:
Jae Yong Sung 2010-07-26 11:43:29 -07:00
parent 679b838b05
commit 80aa14fd43
17 changed files with 884 additions and 304 deletions

View file

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
/*
**
** Copyright 2010, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
-->
<dictionary>
<part name = "main0" />
<part name = "main1" />
<part name = "main2" />
</dictionary>

View file

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
/*
**
** Copyright 2010, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
-->
<dictionary>
<part name = "main" />
</dictionary>

View file

@ -16,6 +16,7 @@
package com.android.inputmethod.latin;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
@ -31,6 +32,14 @@ import android.util.Log;
*/
public class BinaryDictionary extends Dictionary {
/**
* There is difference between what java and native code can handle.
* This value should only be used in BinaryDictionary.java
* It is necessary to keep it at this value because some languages e.g. German have
* really long words.
*/
protected static final int MAX_WORD_LENGTH = 48;
private static final String TAG = "BinaryDictionary";
private static final int MAX_ALTERNATIVES = 16;
private static final int MAX_WORDS = 16;
@ -64,8 +73,8 @@ public class BinaryDictionary extends Dictionary {
* @param context application context for reading resources
* @param resId the resource containing the raw binary dictionary
*/
public BinaryDictionary(Context context, int resId, int dicTypeId) {
if (resId != 0) {
public BinaryDictionary(Context context, int[] resId, int dicTypeId) {
if (resId != null && resId.length > 0 && resId[0] != 0) {
loadDictionary(context, resId);
}
mDicTypeId = dicTypeId;
@ -97,47 +106,68 @@ public class BinaryDictionary extends Dictionary {
private native void closeNative(int dict);
private native boolean isValidWordNative(int nativeData, char[] word, int wordLength);
private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize,
char[] outputChars, int[] frequencies,
int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
int[] nextLettersFrequencies, int nextLettersSize);
private native int getBigramsNative(int nativeData, char[] prevWord, int prevWordLength,
char[] outputChars, int[] frequencies, int maxWordLength, int maxBigrams);
char[] outputChars, int[] frequencies, int maxWordLength, int maxWords,
int maxAlternatives, int skipPos, int[] nextLettersFrequencies, int nextLettersSize);
private native int getBigramsNative(int dict, char[] prevWord, int prevWordLength,
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] frequencies,
int maxWordLength, int maxBigrams, int maxAlternatives);
private final void loadDictionary(Context context, int resId) {
InputStream is = context.getResources().openRawResource(resId);
private final void loadDictionary(Context context, int[] resId) {
InputStream[] is = null;
try {
int avail = is.available();
// merging separated dictionary into one if dictionary is separated
int total = 0;
is = new InputStream[resId.length];
for (int i = 0; i < resId.length; i++) {
is[i] = context.getResources().openRawResource(resId[i]);
total += is[i].available();
}
mNativeDictDirectBuffer =
ByteBuffer.allocateDirect(avail).order(ByteOrder.nativeOrder());
int got = Channels.newChannel(is).read(mNativeDictDirectBuffer);
if (got != avail) {
Log.e(TAG, "Read " + got + " bytes, expected " + avail);
ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder());
int got = 0;
for (int i = 0; i < resId.length; i++) {
got += Channels.newChannel(is[i]).read(mNativeDictDirectBuffer);
}
if (got != total) {
Log.e(TAG, "Read " + got + " bytes, expected " + total);
} else {
mNativeDict = openNative(mNativeDictDirectBuffer,
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER);
mDictLength = avail;
mDictLength = total;
}
} catch (IOException e) {
Log.w(TAG, "No available size for binary dictionary");
Log.w(TAG, "No available memory for binary dictionary");
} finally {
try {
is.close();
for (int i = 0;i < is.length; i++) {
is[i].close();
}
} catch (IOException e) {
Log.w(TAG, "Failed to close input stream");
}
}
}
@Override
public void getBigrams(final WordComposer composer, final CharSequence previousWord,
public void getBigrams(final WordComposer codes, final CharSequence previousWord,
final WordCallback callback, int[] nextLettersFrequencies) {
char[] chars = previousWord.toString().toCharArray();
Arrays.fill(mOutputChars_bigrams, (char) 0);
Arrays.fill(mFrequencies_bigrams, 0);
int count = getBigramsNative(mNativeDict, chars, chars.length, mOutputChars_bigrams,
mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS);
int codesSize = codes.size();
Arrays.fill(mInputCodes, -1);
int[] alternatives = codes.getCodesAt(0);
System.arraycopy(alternatives, 0, mInputCodes, 0,
Math.min(alternatives.length, MAX_ALTERNATIVES));
int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize,
mOutputChars_bigrams, mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS,
MAX_ALTERNATIVES);
for (int j = 0; j < count; j++) {
if (mFrequencies_bigrams[j] < 1) break;
int start = j * MAX_WORD_LENGTH;
@ -156,7 +186,7 @@ public class BinaryDictionary extends Dictionary {
public void getWords(final WordComposer codes, final WordCallback callback,
int[] nextLettersFrequencies) {
final int codesSize = codes.size();
// Wont deal with really long words.
// Won't deal with really long words.
if (codesSize > MAX_WORD_LENGTH - 1) return;
Arrays.fill(mInputCodes, -1);

View file

@ -22,6 +22,8 @@ import android.database.ContentObserver;
import android.database.Cursor;
import android.os.SystemClock;
import android.provider.ContactsContract.Contacts;
import android.text.TextUtils;
import android.util.Log;
public class ContactsDictionary extends ExpandableDictionary {
@ -30,6 +32,12 @@ public class ContactsDictionary extends ExpandableDictionary {
Contacts.DISPLAY_NAME,
};
/**
* Frequency for contacts information into the dictionary
*/
private static final int FREQUENCY_FOR_CONTACTS = 128;
private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
private static final int INDEX_NAME = 1;
private ContentObserver mObserver;
@ -90,6 +98,7 @@ public class ContactsDictionary extends ExpandableDictionary {
if (name != null) {
int len = name.length();
String prevWord = null;
// TODO: Better tokenization for non-Latin writing systems
for (int i = 0; i < len; i++) {
@ -113,7 +122,13 @@ public class ContactsDictionary extends ExpandableDictionary {
// capitalization of i.
final int wordLen = word.length();
if (wordLen < maxWordLength && wordLen > 1) {
super.addWord(word, 128);
super.addWord(word, FREQUENCY_FOR_CONTACTS);
if (!TextUtils.isEmpty(prevWord)) {
// TODO Do not add email address
super.addBigrams(prevWord, word,
FREQUENCY_FOR_CONTACTS_BIGRAM);
}
prevWord = word;
}
}
}

View file

@ -21,9 +21,6 @@ package com.android.inputmethod.latin;
* strokes.
*/
abstract public class Dictionary {
protected static final int MAX_WORD_LENGTH = 48;
/**
* Whether or not to replicate the typed word in the suggested list, even if it's valid.
*/

View file

@ -29,7 +29,7 @@ public class EditingUtil {
/**
* Number of characters we want to look back in order to identify the previous word
*/
public static final int LOOKBACK_CHARACTER_NUM = 15;
private static final int LOOKBACK_CHARACTER_NUM = 15;
private EditingUtil() {};
@ -185,10 +185,22 @@ public class EditingUtil {
private static final Pattern spaceRegex = Pattern.compile("\\s+");
public static CharSequence getPreviousWord(InputConnection connection) {
public static CharSequence getPreviousWord(InputConnection connection,
String sentenceSeperators) {
//TODO: Should fix this. This could be slow!
CharSequence prev = connection.getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0);
if (prev == null) {
return null;
}
String[] w = spaceRegex.split(prev);
return (w.length >= 2) ? w[w.length-2] : null;
if (w.length >= 2 && w[w.length-2].length() > 0) {
char lastChar = w[w.length-2].charAt(w[w.length-2].length() -1);
if (sentenceSeperators.contains(String.valueOf(lastChar))) {
return null;
}
return w[w.length-2];
} else {
return null;
}
}
}

View file

@ -16,22 +16,32 @@
package com.android.inputmethod.latin;
import java.util.LinkedList;
import android.content.Context;
import android.os.AsyncTask;
import android.os.SystemClock;
import android.util.Log;
/**
* Base class for an in-memory dictionary that can grow dynamically and can
* be searched for suggestions and valid words.
*/
public class ExpandableDictionary extends Dictionary {
/**
* There is difference between what java and native code can handle.
* It uses 32 because Java stack overflows when greater value is used.
*/
protected static final int MAX_WORD_LENGTH = 32;
private Context mContext;
private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
private int mDicTypeId;
private int mMaxDepth;
private int mInputLength;
private int[] mNextLettersFrequencies;
private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH);
public static final int MAX_WORD_LENGTH = 32;
private static final char QUOTE = '\'';
private boolean mRequiresReload;
@ -45,7 +55,9 @@ public class ExpandableDictionary extends Dictionary {
char code;
int frequency;
boolean terminal;
Node parent;
NodeArray children;
LinkedList<NextWord> ngrams; // Supports ngram
}
static class NodeArray {
@ -69,6 +81,18 @@ public class ExpandableDictionary extends Dictionary {
}
}
static class NextWord {
Node word;
NextWord nextWord;
int frequency;
NextWord(Node word, int frequency) {
this.word = word;
this.frequency = frequency;
}
}
private NodeArray mRoots;
private int[][] mCodes;
@ -117,12 +141,11 @@ public class ExpandableDictionary extends Dictionary {
}
public void addWord(String word, int frequency) {
addWordRec(mRoots, word, 0, frequency);
addWordRec(mRoots, word, 0, frequency, null);
}
private void addWordRec(NodeArray children, final String word,
final int depth, final int frequency) {
private void addWordRec(NodeArray children, final String word, final int depth,
final int frequency, Node parentNode) {
final int wordLength = word.length();
final char c = word.charAt(depth);
// Does children have the current character?
@ -139,6 +162,7 @@ public class ExpandableDictionary extends Dictionary {
if (!found) {
childNode = new Node();
childNode.code = c;
childNode.parent = parentNode;
children.add(childNode);
}
if (wordLength == depth + 1) {
@ -151,7 +175,7 @@ public class ExpandableDictionary extends Dictionary {
if (childNode.children == null) {
childNode.children = new NodeArray();
}
addWordRec(childNode.children, word, depth + 1, frequency);
addWordRec(childNode.children, word, depth + 1, frequency, childNode);
}
@Override
@ -185,7 +209,7 @@ public class ExpandableDictionary extends Dictionary {
if (mRequiresReload) startDictionaryLoadingTaskLocked();
if (mUpdatingDictionary) return false;
}
final int freq = getWordFrequencyRec(mRoots, word, 0, word.length());
final int freq = getWordFrequency(word);
return freq > -1;
}
@ -193,32 +217,8 @@ public class ExpandableDictionary extends Dictionary {
* Returns the word's frequency or -1 if not found
*/
public int getWordFrequency(CharSequence word) {
return getWordFrequencyRec(mRoots, word, 0, word.length());
}
/**
* Returns the word's frequency or -1 if not found
*/
private int getWordFrequencyRec(final NodeArray children, final CharSequence word,
final int offset, final int length) {
final int count = children.length;
char currentChar = word.charAt(offset);
for (int j = 0; j < count; j++) {
final Node node = children.data[j];
if (node.code == currentChar) {
if (offset == length - 1) {
if (node.terminal) {
return node.frequency;
}
} else {
if (node.children != null) {
int freq = getWordFrequencyRec(node.children, word, offset + 1, length);
if (freq > -1) return freq;
}
}
}
}
return -1;
Node node = searchNode(mRoots, word, 0, word.length());
return (node == null) ? -1 : node.frequency;
}
/**
@ -325,6 +325,133 @@ public class ExpandableDictionary extends Dictionary {
}
}
/**
* Adds bigrams to the in-memory trie structure that is being used to retrieve any word
* @param addFrequency adding frequency of the pair
* @return returns the final frequency
*/
protected int addBigrams(String word1, String word2, int addFrequency) {
Node firstWord = searchWord(mRoots, word1, 0, null);
Node secondWord = searchWord(mRoots, word2, 0, null);
LinkedList<NextWord> bigram = firstWord.ngrams;
if (bigram == null || bigram.size() == 0) {
firstWord.ngrams = new LinkedList<NextWord>();
bigram = firstWord.ngrams;
} else {
for (NextWord nw : bigram) {
if (nw.word == secondWord) {
nw.frequency += addFrequency;
return nw.frequency;
}
}
}
NextWord nw = new NextWord(secondWord, addFrequency);
firstWord.ngrams.add(nw);
return addFrequency;
}
/**
* Searches for the word and add the word if it does not exist.
* @return Returns the terminal node of the word we are searching for.
*/
private Node searchWord(NodeArray children, String word, int depth, Node parentNode) {
final int wordLength = word.length();
final char c = word.charAt(depth);
// Does children have the current character?
final int childrenLength = children.length;
Node childNode = null;
boolean found = false;
for (int i = 0; i < childrenLength; i++) {
childNode = children.data[i];
if (childNode.code == c) {
found = true;
break;
}
}
if (!found) {
childNode = new Node();
childNode.code = c;
childNode.parent = parentNode;
children.add(childNode);
}
if (wordLength == depth + 1) {
// Terminate this word
childNode.terminal = true;
return childNode;
}
if (childNode.children == null) {
childNode.children = new NodeArray();
}
return searchWord(childNode.children, word, depth + 1, childNode);
}
@Override
public void getBigrams(final WordComposer codes, final CharSequence previousWord,
final WordCallback callback, int[] nextLettersFrequencies) {
synchronized (mUpdatingLock) {
// If we need to update, start off a background task
if (mRequiresReload) startDictionaryLoadingTaskLocked();
// Currently updating contacts, don't return any results.
if (mUpdatingDictionary) return;
}
Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length());
if (prevWord != null && prevWord.ngrams != null) {
reverseLookUp(prevWord.ngrams, callback);
}
}
/**
* reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
* through callback.
* @param terminalNodes list of terminal nodes we want to add
*/
private void reverseLookUp(LinkedList<NextWord> terminalNodes,
final WordCallback callback) {
Node node;
int freq;
for (NextWord nextWord : terminalNodes) {
node = nextWord.word;
freq = nextWord.frequency;
sb.setLength(0);
do {
sb.insert(0, node.code);
node = node.parent;
} while(node != null);
// TODO better way to feed char array?
callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
DataType.BIGRAM);
}
}
/**
* Search for the terminal node of the word
* @return Returns the terminal node of the word if the word exists
*/
private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
final int length) {
// TODO Consider combining with addWordRec
final int count = children.length;
char currentChar = word.charAt(offset);
for (int j = 0; j < count; j++) {
final Node node = children.data[j];
if (node.code == currentChar) {
if (offset == length - 1) {
if (node.terminal) {
return node;
}
} else {
if (node.children != null) {
Node returnNode = searchNode(node.children, word, offset + 1, length);
if (returnNode != null) return returnNode;
}
}
}
}
return null;
}
protected void clearDictionary() {
mRoots = new NodeArray();
}

View file

@ -99,7 +99,10 @@ public class InputLanguageSelection extends PreferenceActivity {
boolean haveDictionary = false;
conf.locale = locale;
res.updateConfiguration(conf, res.getDisplayMetrics());
BinaryDictionary bd = new BinaryDictionary(this, R.raw.main, Suggest.DIC_MAIN);
int[] dictionaries = LatinIME.getDictionary(res, this.getPackageName());
BinaryDictionary bd = new BinaryDictionary(this, dictionaries, Suggest.DIC_MAIN);
// Is the dictionary larger than a placeholder? Arbitrarily chose a lower limit of
// 4000-5000 words, whereas the LARGE_DICTIONARY is about 20000+ words.
if (bd.getSize() > Suggest.LARGE_DICTIONARY_THRESHOLD / 4) {

View file

@ -20,6 +20,8 @@ import com.android.inputmethod.voice.FieldContext;
import com.android.inputmethod.voice.SettingsUtil;
import com.android.inputmethod.voice.VoiceInput;
import org.xmlpull.v1.XmlPullParserException;
import android.app.AlertDialog;
import android.content.BroadcastReceiver;
import android.content.Context;
@ -29,6 +31,7 @@ import android.content.IntentFilter;
import android.content.SharedPreferences;
import android.content.res.Configuration;
import android.content.res.Resources;
import android.content.res.XmlResourceParser;
import android.inputmethodservice.InputMethodService;
import android.inputmethodservice.Keyboard;
import android.media.AudioManager;
@ -60,6 +63,7 @@ import android.view.inputmethod.InputConnection;
import android.view.inputmethod.InputMethodManager;
import java.io.FileDescriptor;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
@ -159,6 +163,8 @@ public class LatinIME extends InputMethodService
KeyboardSwitcher mKeyboardSwitcher;
private UserDictionary mUserDictionary;
// User Bigram is disabled for now
//private UserBigramDictionary mUserBigramDictionary;
private ContactsDictionary mContactsDictionary;
private AutoDictionary mAutoDictionary;
@ -383,6 +389,45 @@ public class LatinIME extends InputMethodService
prefs.registerOnSharedPreferenceChangeListener(this);
}
/**
* Loads a dictionary or multiple separated dictionary
* @return returns array of dictionary resource ids
*/
static int[] getDictionary(Resources res, String packageName) {
XmlResourceParser xrp = res.getXml(R.xml.dictionary);
int dictionaryCount = 0;
ArrayList<Integer> dictionaries = new ArrayList<Integer>();
try {
int current = xrp.getEventType();
while (current != XmlResourceParser.END_DOCUMENT) {
if (current == XmlResourceParser.START_TAG) {
String tag = xrp.getName();
if (tag != null) {
if (tag.equals("part")) {
String dictFileName = xrp.getAttributeValue(null, "name");
dictionaries.add(res.getIdentifier(dictFileName, "raw", packageName));
}
}
}
xrp.next();
current = xrp.getEventType();
}
} catch (XmlPullParserException e) {
Log.e(TAG, "Dictionary XML parsing failure");
} catch (IOException e) {
Log.e(TAG, "Dictionary XML IOException");
}
int count = dictionaries.size();
int[] dict = new int[count];
for (int i = 0; i < count; i++) {
dict[i] = dictionaries.get(i);
}
return dict;
}
private void initSuggest(String locale) {
mInputLocale = locale;
@ -396,7 +441,9 @@ public class LatinIME extends InputMethodService
}
SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(this);
mQuickFixes = sp.getBoolean(PREF_QUICK_FIXES, true);
mSuggest = new Suggest(this, R.raw.main);
int[] dictionaries = getDictionary(orig, this.getPackageName());
mSuggest = new Suggest(this, dictionaries);
updateAutoTextEnabled(saveLocale);
if (mUserDictionary != null) mUserDictionary.close();
mUserDictionary = new UserDictionary(this, mInputLocale);
@ -407,6 +454,15 @@ public class LatinIME extends InputMethodService
mAutoDictionary.close();
}
mAutoDictionary = new AutoDictionary(this, this, mInputLocale, Suggest.DIC_AUTO);
// User Bigram is disabled for now
/*
if (mUserBigramDictionary != null) {
mUserBigramDictionary.close();
}
mUserBigramDictionary = new UserBigramDictionary(this, this, mInputLocale,
Suggest.DIC_USERBIGRAM);
mSuggest.setUserBigramDictionary(mUserBigramDictionary);
*/
mSuggest.setUserDictionary(mUserDictionary);
mSuggest.setContactsDictionary(mContactsDictionary);
mSuggest.setAutoDictionary(mAutoDictionary);
@ -642,6 +698,8 @@ public class LatinIME extends InputMethodService
mKeyboardSwitcher.getInputView().closing();
}
if (mAutoDictionary != null) mAutoDictionary.flushPendingWrites();
// User Bigram is disabled for now
//if (mUserBigramDictionary != null) mUserBigramDictionary.flushPendingWrites();
}
@Override
@ -897,7 +955,7 @@ public class LatinIME extends InputMethodService
}
mCommittedLength = mComposing.length();
TextEntryState.acceptedTyped(mComposing);
checkAddToDictionary(mComposing, AutoDictionary.FREQUENCY_FOR_TYPED);
addToDictionaries(mComposing, AutoDictionary.FREQUENCY_FOR_TYPED);
}
updateSuggestions();
}
@ -1583,9 +1641,10 @@ public class LatinIME extends InputMethodService
private void showSuggestions(WordComposer word) {
//long startTime = System.currentTimeMillis(); // TIME MEASUREMENT!
// TODO Maybe need better way of retrieving previous word
CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection());
CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection(),
mWordSeparators);
List<CharSequence> stringList = mSuggest.getSuggestions(
mKeyboardSwitcher.getInputView(), word, false, prevWord);
mKeyboardSwitcher.getInputView(), word, false, prevWord);
//long stopTime = System.currentTimeMillis(); // TIME MEASUREMENT!
//Log.d("LatinIME","Suggest Total Time - " + (stopTime - startTime));
@ -1601,7 +1660,8 @@ public class LatinIME extends InputMethodService
boolean typedWordValid = mSuggest.isValidWord(typedWord) ||
(preferCapitalization()
&& mSuggest.isValidWord(typedWord.toString().toLowerCase()));
if (mCorrectionMode == Suggest.CORRECTION_FULL) {
if (mCorrectionMode == Suggest.CORRECTION_FULL
|| mCorrectionMode == Suggest.CORRECTION_FULL_BIGRAM) {
correctionAvailable |= typedWordValid;
}
// Don't auto-correct words with multiple capital letter
@ -1637,8 +1697,9 @@ public class LatinIME extends InputMethodService
mJustAccepted = true;
pickSuggestion(mBestWord, false);
// Add the word to the auto dictionary if it's not a known word
checkAddToDictionary(mBestWord, AutoDictionary.FREQUENCY_FOR_TYPED);
addToDictionaries(mBestWord, AutoDictionary.FREQUENCY_FOR_TYPED);
return true;
}
return false;
}
@ -1692,7 +1753,9 @@ public class LatinIME extends InputMethodService
pickSuggestion(suggestion, correcting);
// Add the word to the auto dictionary if it's not a known word
if (index == 0) {
checkAddToDictionary(suggestion, AutoDictionary.FREQUENCY_FOR_PICKED);
addToDictionaries(suggestion, AutoDictionary.FREQUENCY_FOR_PICKED);
} else {
addToBigramDictionary(suggestion, 1);
}
LatinImeLogger.logOnManualSuggestion(mComposing.toString(), suggestion.toString(),
index, suggestions);
@ -1892,16 +1955,43 @@ public class LatinIME extends InputMethodService
ic.setSelection(mLastSelectionStart, mLastSelectionStart);
}
private void checkAddToDictionary(CharSequence suggestion, int frequencyDelta) {
private void addToDictionaries(CharSequence suggestion, int frequencyDelta) {
checkAddToDictionary(suggestion, frequencyDelta, false);
}
private void addToBigramDictionary(CharSequence suggestion, int frequencyDelta) {
checkAddToDictionary(suggestion, frequencyDelta, true);
}
/**
* Adds to the UserBigramDictionary and/or AutoDictionary
* @param addToBigramDictionary true if it should be added to bigram dictionary if possible
*/
private void checkAddToDictionary(CharSequence suggestion, int frequencyDelta,
boolean addToBigramDictionary) {
if (suggestion == null || suggestion.length() < 1) return;
// Only auto-add to dictionary if auto-correct is ON. Otherwise we'll be
// adding words in situations where the user or application really didn't
// want corrections enabled or learned.
if (!(mCorrectionMode == Suggest.CORRECTION_FULL)) return;
if (suggestion != null && mAutoDictionary.isValidWord(suggestion)
|| (!mSuggest.isValidWord(suggestion.toString())
if (!(mCorrectionMode == Suggest.CORRECTION_FULL
|| mCorrectionMode == Suggest.CORRECTION_FULL_BIGRAM)) {
return;
}
if (suggestion != null) {
if (!addToBigramDictionary && mAutoDictionary.isValidWord(suggestion)
|| (!mSuggest.isValidWord(suggestion.toString())
&& !mSuggest.isValidWord(suggestion.toString().toLowerCase()))) {
mAutoDictionary.addWord(suggestion.toString(), frequencyDelta);
mAutoDictionary.addWord(suggestion.toString(), frequencyDelta);
}
// User Bigram is disabled for now
/*
if (mUserBigramDictionary != null) {
CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection());
if (!TextUtils.isEmpty(prevWord)) {
mUserBigramDictionary.addBigrams(prevWord.toString(), suggestion.toString(), 1);
}
}
*/
}
}

View file

@ -34,6 +34,10 @@ import android.view.View;
*/
public class Suggest implements Dictionary.WordCallback {
private static final String TAG = "Suggest";
public static final int APPROX_MAX_WORD_LENGTH = 32;
public static final int CORRECTION_NONE = 0;
public static final int CORRECTION_BASIC = 1;
public static final int CORRECTION_FULL = 2;
@ -71,6 +75,8 @@ public class Suggest implements Dictionary.WordCallback {
private Dictionary mContactsDictionary;
private Dictionary mUserBigramDictionary;
private int mPrefMaxSuggestions = 12;
private int mPrefMaxBigrams = 255;
@ -95,7 +101,7 @@ public class Suggest implements Dictionary.WordCallback {
private int mCorrectionMode = CORRECTION_BASIC;
public Suggest(Context context, int dictionaryResId) {
public Suggest(Context context, int[] dictionaryResId) {
mMainDict = new BinaryDictionary(context, dictionaryResId, DIC_MAIN);
initPool();
}
@ -107,7 +113,7 @@ public class Suggest implements Dictionary.WordCallback {
private void initPool() {
for (int i = 0; i < mPrefMaxSuggestions; i++) {
StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH);
StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
mStringPool.add(sb);
}
}
@ -128,6 +134,10 @@ public class Suggest implements Dictionary.WordCallback {
return mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD;
}
public int getApproxMaxWordLength() {
return APPROX_MAX_WORD_LENGTH;
}
/**
* Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
* before the main dictionary, if set.
@ -147,6 +157,10 @@ public class Suggest implements Dictionary.WordCallback {
mAutoDictionary = autoDictionary;
}
public void setUserBigramDictionary(Dictionary userBigramDictionary) {
mUserBigramDictionary = userBigramDictionary;
}
/**
* Number of suggestions to generate from the input key sequence. This has
* to be a number between 1 and 100 (inclusive).
@ -162,7 +176,7 @@ public class Suggest implements Dictionary.WordCallback {
mBigramPriorities = new int[mPrefMaxBigrams];
collectGarbage(mSuggestions, mPrefMaxSuggestions);
while (mStringPool.size() < mPrefMaxSuggestions) {
StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH);
StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
mStringPool.add(sb);
}
}
@ -224,10 +238,9 @@ public class Suggest implements Dictionary.WordCallback {
mLowerOriginalWord = "";
}
// Search the dictionary only if there are at least 2 characters
if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM
|| mCorrectionMode == CORRECTION_BASIC)) {
// At first character, just get the bigrams
// At first character typed, search only the bigrams
Arrays.fill(mBigramPriorities, 0);
collectGarbage(mBigramSuggestions, mPrefMaxBigrams);
@ -236,17 +249,29 @@ public class Suggest implements Dictionary.WordCallback {
if (mMainDict.isValidWord(lowerPrevWord)) {
prevWordForBigram = lowerPrevWord;
}
mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
mNextLettersFrequencies);
if (mUserBigramDictionary != null) {
mUserBigramDictionary.getBigrams(wordComposer, prevWordForBigram, this,
mNextLettersFrequencies);
}
if (mContactsDictionary != null) {
mContactsDictionary.getBigrams(wordComposer, prevWordForBigram, this,
mNextLettersFrequencies);
}
if (mMainDict != null) {
mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
mNextLettersFrequencies);
}
char currentChar = wordComposer.getTypedWord().charAt(0);
char currentCharUpper = Character.toUpperCase(currentChar);
int count = 0;
int bigramSuggestionSize = mBigramSuggestions.size();
for (int i = 0; i < bigramSuggestionSize; i++) {
if (mBigramSuggestions.get(i).charAt(0) == currentChar) {
if (mBigramSuggestions.get(i).charAt(0) == currentChar
|| mBigramSuggestions.get(i).charAt(0) == currentCharUpper) {
int poolSize = mStringPool.size();
StringBuilder sb = poolSize > 0 ?
(StringBuilder) mStringPool.remove(poolSize - 1)
: new StringBuilder(Dictionary.MAX_WORD_LENGTH);
: new StringBuilder(getApproxMaxWordLength());
sb.setLength(0);
sb.append(mBigramSuggestions.get(i));
mSuggestions.add(count++, sb);
@ -256,7 +281,7 @@ public class Suggest implements Dictionary.WordCallback {
}
} else if (wordComposer.size() > 1) {
// Search the dictionary only if there are at least 2 characters
// At second character typed, search the unigrams (scores being affected by bigrams)
if (mUserDictionary != null || mContactsDictionary != null) {
if (mUserDictionary != null) {
mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
@ -277,7 +302,6 @@ public class Suggest implements Dictionary.WordCallback {
mHaveCorrection = true;
}
}
if (mOriginalWord != null) {
mSuggestions.add(0, mOriginalWord.toString());
}
@ -290,7 +314,6 @@ public class Suggest implements Dictionary.WordCallback {
mHaveCorrection = false;
}
}
if (mAutoTextEnabled) {
int i = 0;
int max = 6;
@ -401,7 +424,7 @@ public class Suggest implements Dictionary.WordCallback {
/ MAXIMUM_BIGRAM_FREQUENCY)
* (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN)
+ BIGRAM_MULTIPLIER_MIN;
/* Log.d("Suggest","bigram num: " + bigramSuggestion
/* Log.d(TAG,"bigram num: " + bigramSuggestion
+ " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString()
+ " currentPriority: " + freq + " bigramPriority: "
+ mBigramPriorities[bigramSuggestion]
@ -430,7 +453,7 @@ public class Suggest implements Dictionary.WordCallback {
priorities[pos] = freq;
int poolSize = mStringPool.size();
StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
: new StringBuilder(Dictionary.MAX_WORD_LENGTH);
: new StringBuilder(getApproxMaxWordLength());
sb.setLength(0);
if (mCapitalize) {
sb.append(Character.toUpperCase(word[offset]));

View file

@ -67,8 +67,9 @@ static int latinime_BinaryDictionary_getSuggestions(
int *nextLetters = nextLettersArray != NULL ? env->GetIntArrayElements(nextLettersArray, NULL)
: NULL;
int count = dictionary->getSuggestions(inputCodes, arraySize, (unsigned short*) outputChars, frequencies,
maxWordLength, maxWords, maxAlternatives, skipPos, nextLetters, nextLettersSize);
int count = dictionary->getSuggestions(inputCodes, arraySize, (unsigned short*) outputChars,
frequencies, maxWordLength, maxWords, maxAlternatives, skipPos, nextLetters,
nextLettersSize);
env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT);
@ -81,20 +82,24 @@ static int latinime_BinaryDictionary_getSuggestions(
}
static int latinime_BinaryDictionary_getBigrams
(JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength,
jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams)
(JNIEnv *env, jobject object, jint dict, jcharArray prevWordArray, jint prevWordLength,
jintArray inputArray, jint inputArraySize, jcharArray outputArray,
jintArray frequencyArray, jint maxWordLength, jint maxBigrams, jint maxAlternatives)
{
Dictionary *dictionary = (Dictionary*) dict;
if (dictionary == NULL) return 0;
jchar *word = env->GetCharArrayElements(wordArray, NULL);
jchar *prevWord = env->GetCharArrayElements(prevWordArray, NULL);
int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
jchar *outputChars = env->GetCharArrayElements(outputArray, NULL);
int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
int count = dictionary->getBigrams((unsigned short*) word, wordLength,
(unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams);
int count = dictionary->getBigrams((unsigned short*) prevWord, prevWordLength, inputCodes,
inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams,
maxAlternatives);
env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
env->ReleaseCharArrayElements(prevWordArray, prevWord, JNI_ABORT);
env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT);
env->ReleaseCharArrayElements(outputArray, outputChars, 0);
env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
@ -130,7 +135,7 @@ static JNINativeMethod gMethods[] = {
{"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close},
{"getSuggestionsNative", "(I[II[C[IIIII[II)I", (void*)latinime_BinaryDictionary_getSuggestions},
{"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
{"getBigramsNative", "(I[CI[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}
{"getBigramsNative", "(I[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams}
};
static int registerNativeMethods(JNIEnv* env, const char* className,

View file

@ -387,13 +387,17 @@ Dictionary::getBigramFreq(int *pos)
int
Dictionary::getBigrams(unsigned short *prevWord, int prevWordLength, unsigned short *bigramChars,
int *bigramFreq, int maxWordLength, int maxBigrams)
Dictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes, int codesSize,
unsigned short *bigramChars, int *bigramFreq, int maxWordLength, int maxBigrams,
int maxAlternatives)
{
mBigramFreq = bigramFreq;
mBigramChars = bigramChars;
mInputCodes = codes;
mInputLength = codesSize;
mMaxWordLength = maxWordLength;
mMaxBigrams = maxBigrams;
mMaxAlternatives = maxAlternatives;
if (mBigram == 1 && checkIfDictVersionIsLatest()) {
int pos = isValidWordRec(DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength);
@ -406,7 +410,7 @@ Dictionary::getBigrams(unsigned short *prevWord, int prevWordLength, unsigned sh
int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
if (bigramExist > 0) {
int nextBigramExist = 1;
while (nextBigramExist > 0) {
while (nextBigramExist > 0 && bigramCount < maxBigrams) {
int bigramAddress = getBigramAddress(&pos, true);
int frequency = (FLAG_BIGRAM_FREQ & mDict[pos]);
// search for all bigrams and store them
@ -521,8 +525,27 @@ Dictionary::searchForTerminalNode(int addressLookingFor, int frequency)
break;
}
}
if (checkFirstCharacter(word)) {
addWordBigram(word, depth, frequency);
}
}
addWordBigram(word, depth, frequency);
bool
Dictionary::checkFirstCharacter(unsigned short *word)
{
// Checks whether this word starts with same character or neighboring characters of
// what user typed.
int *inputCodes = mInputCodes;
int maxAlt = mMaxAlternatives;
while (maxAlt > 0) {
if ((unsigned int) *inputCodes == (unsigned int) *word) {
return true;
}
inputCodes++;
maxAlt--;
}
return false;
}
bool

View file

@ -39,8 +39,9 @@ public:
int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
int *nextLetters, int nextLettersSize);
int getBigrams(unsigned short *word, int length, unsigned short *outWords, int *frequencies,
int maxWordLength, int maxBigrams);
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
int maxAlternatives);
bool isValidWord(unsigned short *word, int length);
void setAsset(void *asset) { mAsset = asset; }
void *getAsset() { return mAsset; }
@ -64,6 +65,7 @@ private:
int wideStrLen(unsigned short *str);
bool sameAsTyped(unsigned short *word, int length);
bool checkFirstCharacter(unsigned short *word);
bool addWord(unsigned short *word, int length, int frequency);
bool addWordBigram(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c);

View file

@ -0,0 +1,24 @@
This text is used as test text for measuring performance of dictionary prediction. Any text can be put into this file to test the performance (total keystroke savings).
When you think about “information,” what probably comes to mind are streams of words and numbers. Googles pretty good at organizing these types of information, but consider all the things you cant express with words: what does it look like in the middle of a sandstorm? What are some great examples of Art Nouveau architecture? Should I consider wedding cupcakes instead of a traditional cake?
This is why we built Google Images in 2001. We realized that for many searches, the best answer wasnt text—it was an image or a set of images. The service has grown quite a bit since then. In 2001, we indexed around 250 million images. By 2005, we had indexed over 1 billion. And today, we have an index of over 10 billion images.
Its not just about quantity, though. Over the past decade weve been baking deep computer science into Google Images to make it even faster and easier for you to find precisely the right images. We not only find images for pretty much anything you type in; we can also instantly pull out images of clip art, line drawings, faces and even colors.
Theres even more sophisticated computer vision technology powering our “Similar images” tool. For example, did you know there are nine subspecies of leopards, each with a distinct pattern of spots? Google Images can recognize the difference, returning just leopards of a particular subspecies. It can tell you the name of the subspecies in a particular image—even if that image isnt labeled—because other similar leopard images on the web are labeled with that subspeciess name.
And our “Similar colors” refinement doesnt just return images based on the overall color of an image. If it did, lots of images would simply be classified as “white.” If youre looking for [tulips] and you refine results to “white,” you really want images in which the tulips themselves are white—not the surrounding image. It takes some heavy-duty algorithmic wizardry and processing power for a search engine to understand what the items of interest are in all the images out there.
Those are just a few of the technologies weve built to make Google Images more useful. Meanwhile, the quantity and variety of images on the web has ballooned since 2001, and images have become one of the most popular types of content people search for. So over the next few days were rolling out an update to Google Images to match the scope and beauty of this fast-growing visual web, and to bring to the surface some of the powerful technology behind Images.
Heres whats new in this refreshed design of Google Images:
Dense tiled layout designed to make it easy to look at lots of images at once. We want to get the app out of the way so you can find what youre really looking for.
Instant scrolling between pages, without letting you get lost in the images. You can now get up to 1,000 images, all in one scrolling page. And well show small, unobtrusive page numbers so you dont lose track of where you are.
Larger thumbnail previews on the results page, designed for modern browsers and high-res screens.
A hover pane that appears when you mouse over a given thumbnail image, giving you a larger preview, more info about the image and other image-specific features such as “Similar images.”
Once you click on an image, youre taken to a new landing page that displays a large image in context, with the website its hosted on visible right behind it. Click anywhere outside the image, and youre right in the original page where you can learn more about the source and context.
Optimized keyboard navigation for faster scrolling through many pages, taking advantage of standard web keyboard shortcuts such as Page Up / Page Down. Its all about getting you to the info you need quickly, so you can get on with actually building that treehouse or buying those flowers.
Apple's not really ready to say it's sorry about the iPhone 4 antenna design, but it is willing to give all you darn squeaky wheels free cases for your trouble. Since Apple can't build its own Bumpers fast enough, it will give you a few options and let you decide, then send it your way for free as long as you purchased the phone before September 30th. Not good enough for you? Well, if you already bought a bumper from Apple you'll get a refund, and you can also return your phone for a full refund within 30 days as long as it's unharmed.
This solution comes at the end of 22 days of Apple engineers "working their butts off," according to Steve, with "physics" ultimately being pinned as the main culprit. Apple claims you can replicate the left-handed "death grip" bar-dropping problem on the BlackBerry Bold 9700, HTC Droid Eris, and Samsung Omnia II, and that "phones aren't perfect." Steve also claims that only 0.55% of people who bought the iPhone 4 have called into AppleCare to complain about the antenna, and the phone has a 1.7% return rate at AT&T, compared to 6% with the 3GS, though he would cop to a slight increase in dropped calls over the iPhone 3GS. For this Steve has what he confesses to be a pet theory: that 3GS users were using the case they had from the 3G, and therefore weren't met with the horrible reality of a naked, call dropping handset. Hence the free case solution, which will probably satisfy some, infuriate others, and never even blip onto the radar of many of the massive horde of consumers that's devoured this product in unprecedented numbers.
Update: Our own Richard Lai just waltzed down to the Regent Street Apple Store in London with his iPhone Bumper receipt in hand. A few minutes later he left with cold, hard cash, and kept the Bumper to boot. Seems as if the refund effort is a go, at least over in the UK.
Update 2: We've heard from several tipsters saying Apple no longer does Bumper refunds at its stores; customers will now have to make an online claim instead. Looks like we got super lucky.
If you have ever received an instant message, text message, or any text-based chat message that seemed to be written in a foreign language, this Webopedia Quick Reference will help you decipher the text chat lingo by providing the definitions to more than 1,300 chat, text message, and Twitter abbreviations.
With the popularity and rise in real-time text-based communications, such as Facebook, Twitter, instant messaging, e-mail, Internet and online gaming services, chat rooms, discussion boards and mobile phone text messaging (SMS), came the emergence of a new language tailored to the immediacy and compactness of these new communication media.
While it does seem incredible that there are so many chat abbreviations, remember that different chat abbreviations are used by different groups of people when communicating online. Some of the following chat abbreviations may be familiar to you, while others may be foreign because they are used by a group of people with different online interests and hobbies than your own. For example, people playing online games are likely to use chat abbreviations that are different than those used by someone running a financial blog updating their Twitter status.
Twitter is a free microblog, or social messaging tool that lets people stay connected through brief text message updates up to 140 characters in length. Twitter is based on you answering the question "What are you doing?" You then post thoughts, observations and goings-on during the day in answer to that question. Your update is posted on your Twitter profile page through SMS text messaging, the Twitter Web site, instant messaging, RSS, e-mail or through other social applications and sites, such as Facebook.
As with any new social medium, there is an entire vocabulary that users of the Twitter service adopt. Many of the new lingo Twitter-based terms and phrases are used to describe the collection of people who use the service, while other terms are used in reference to describe specific functions and features of the service itself. Also, there are a number of "chat terms," which are basically shorthand abbreviations that users often include in their tweets. Lastly, our guide also provides descriptions to a number of Twitter tools and applications that you can use to enhance your Twitter experience.
Here are definitions to more than 100 Twitter-related abbreviations, words, phrases, and tools that are associated with the Twitter microblogging service. If you know of a Twitter slang term or application name that is not included in our Twitter Dictionary, please let us know.

View file

@ -0,0 +1,192 @@
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin.tests;
import android.content.Context;
import android.test.AndroidTestCase;
import android.text.TextUtils;
import android.util.Log;
import com.android.inputmethod.latin.Suggest;
import com.android.inputmethod.latin.WordComposer;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.Channels;
import java.util.List;
public class SuggestHelper {
private Suggest mSuggest;
private final String TAG;
public SuggestHelper(String tag, Context context, int[] resId) {
TAG = tag;
try {
// merging separated dictionary into one if dictionary is separated
int total = 0;
InputStream[] res = new InputStream[resId.length];
for (int i = 0; i < resId.length; i++) {
res[i] = context.getResources().openRawResource(resId[i]);
total += res[i].available();
}
ByteBuffer byteBuffer =
ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder());
int got = 0;
for (int i = 0; i < resId.length; i++) {
got += Channels.newChannel(res[i]).read(byteBuffer);
}
if (got != total) {
Log.w(TAG, "Read " + got + " bytes, expected " + total);
} else {
mSuggest = new Suggest(context, byteBuffer);
Log.i(TAG, "Created mSuggest " + total + " bytes");
}
} catch (IOException e) {
Log.w(TAG, "No available memory for binary dictionary");
} finally {
try {
for (int i = 0;i < is.length; i++) {
res[i].close();
}
} catch (IOException e) {
Log.w(TAG, "Failed to close input stream");
}
}
mSuggest.setAutoTextEnabled(false);
mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM);
}
private WordComposer createWordComposer(CharSequence s) {
WordComposer word = new WordComposer();
for (int i = 0; i < s.length(); i++) {
final char c = s.charAt(i);
int[] codes;
// If it's not a lowercase letter, don't find adjacent letters
if (c < 'a' || c > 'z') {
codes = new int[] { c };
} else {
codes = adjacents[c - 'a'];
}
word.add(c, codes);
}
return word;
}
private void showList(String title, List<CharSequence> suggestions) {
Log.i(TAG, title);
for (int i = 0; i < suggestions.size(); i++) {
Log.i(title, suggestions.get(i) + ", ");
}
}
private boolean isDefaultSuggestion(List<CharSequence> suggestions, CharSequence word) {
// Check if either the word is what you typed or the first alternative
return suggestions.size() > 0 &&
(/*TextUtils.equals(suggestions.get(0), word) || */
(suggestions.size() > 1 && TextUtils.equals(suggestions.get(1), word)));
}
boolean isDefaultSuggestion(CharSequence typed, CharSequence expected) {
WordComposer word = createWordComposer(typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
return isDefaultSuggestion(suggestions, expected);
}
boolean isDefaultCorrection(CharSequence typed, CharSequence expected) {
WordComposer word = createWordComposer(typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection();
}
boolean isASuggestion(CharSequence typed, CharSequence expected) {
WordComposer word = createWordComposer(typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
for (int i = 1; i < suggestions.size(); i++) {
if (TextUtils.equals(suggestions.get(i), expected)) return true;
}
return false;
}
private void getBigramSuggestions(CharSequence previous, CharSequence typed) {
if(!TextUtils.isEmpty(previous) && (typed.length() > 1)) {
WordComposer firstChar = createWordComposer(typed.charAt(0) + "");
mSuggest.getSuggestions(null, firstChar, false, previous);
}
}
boolean isDefaultNextSuggestion(CharSequence previous, CharSequence typed,
CharSequence expected) {
WordComposer word = createWordComposer(typed);
getBigramSuggestions(previous, typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
return isDefaultSuggestion(suggestions, expected);
}
boolean isDefaultNextCorrection(CharSequence previous, CharSequence typed,
CharSequence expected) {
WordComposer word = createWordComposer(typed);
getBigramSuggestions(previous, typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection();
}
boolean isASuggestion(CharSequence previous, CharSequence typed,
CharSequence expected) {
WordComposer word = createWordComposer(typed);
getBigramSuggestions(previous, typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
for (int i = 1; i < suggestions.size(); i++) {
if (TextUtils.equals(suggestions.get(i), expected)) return true;
}
return false;
}
boolean isValid(CharSequence typed) {
return mSuggest.isValidWord(typed);
}
final int[][] adjacents = {
{'a','s','w','q',-1},
{'b','h','v','n','g','j',-1},
{'c','v','f','x','g',},
{'d','f','r','e','s','x',-1},
{'e','w','r','s','d',-1},
{'f','g','d','c','t','r',-1},
{'g','h','f','y','t','v',-1},
{'h','j','u','g','b','y',-1},
{'i','o','u','k',-1},
{'j','k','i','h','u','n',-1},
{'k','l','o','j','i','m',-1},
{'l','k','o','p',-1},
{'m','k','n','l',-1},
{'n','m','j','k','b',-1},
{'o','p','i','l',-1},
{'p','o',-1},
{'q','w',-1},
{'r','t','e','f',-1},
{'s','d','e','w','a','z',-1},
{'t','y','r',-1},
{'u','y','i','h','j',-1},
{'v','b','g','c','h',-1},
{'w','e','q',-1},
{'x','c','d','z','f',-1},
{'y','u','t','h','g',-1},
{'z','s','x','a','d',-1},
};
}

View file

@ -0,0 +1,127 @@
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin.tests;
import android.test.AndroidTestCase;
import android.util.Log;
import java.io.InputStreamReader;
import java.io.InputStream;
import java.io.BufferedReader;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
public class SuggestPerformanceTests extends AndroidTestCase {
private static final String TAG = "SuggestPerformanceTests";
private String mTestText;
private SuggestHelper sh;
@Override
protected void setUp() {
// TODO Figure out a way to directly using the dictionary rather than copying it over
// For testing with real dictionary, TEMPORARILY COPY main dictionary into test directory.
// DO NOT SUBMIT real dictionary under test directory.
//int[] resId = new int[] { R.raw.main0, R.raw.main1, R.raw.main2 };
int[] resId = new int[] { R.raw.test };
sh = new SuggestHelper(TAG, getTestContext(), resId);
loadString();
}
private void loadString() {
try {
InputStream is = getTestContext().getResources().openRawResource(R.raw.testtext);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
StringBuilder sb = new StringBuilder();
String line = reader.readLine();
while (line != null) {
sb.append(line + " ");
line = reader.readLine();
}
mTestText = sb.toString();
} catch (Exception e) {
e.printStackTrace();
}
}
/************************** Helper functions ************************/
private int lookForSuggestion(String prevWord, String currentWord) {
for (int i = 1; i < currentWord.length(); i++) {
if (i == 1) {
if (sh.isDefaultNextSuggestion(prevWord, currentWord.substring(0, i),
currentWord)) {
return i;
}
} else {
if (sh.isDefaultNextCorrection(prevWord, currentWord.substring(0, i),
currentWord)) {
return i;
}
}
}
return currentWord.length();
}
private double runText(boolean withBigrams) {
StringTokenizer st = new StringTokenizer(mTestText);
String prevWord = null;
int typeCount = 0;
int characterCount = 0; // without space
int wordCount = 0;
while (st.hasMoreTokens()) {
String currentWord = st.nextToken();
boolean endCheck = false;
if (currentWord.matches("[\\w]*[\\.|?|!|*|@|&|/|:|;]")) {
currentWord = currentWord.substring(0, currentWord.length() - 1);
endCheck = true;
}
if (withBigrams && prevWord != null) {
typeCount += lookForSuggestion(prevWord, currentWord);
} else {
typeCount += lookForSuggestion(null, currentWord);
}
characterCount += currentWord.length();
if (!endCheck) prevWord = currentWord;
wordCount++;
}
double result = (double) (characterCount - typeCount) / characterCount * 100;
if (withBigrams) {
Log.i(TAG, "with bigrams -> " + result + " % saved!");
} else {
Log.i(TAG, "without bigrams -> " + result + " % saved!");
}
Log.i(TAG, "\ttotal number of words: " + wordCount);
Log.i(TAG, "\ttotal number of characters: " + mTestText.length());
Log.i(TAG, "\ttotal number of characters without space: " + characterCount);
Log.i(TAG, "\ttotal number of characters typed: " + typeCount);
return result;
}
/************************** Performance Tests ************************/
/**
* Compare the Suggest with and without bigram
* Check the log for detail
*/
public void testSuggestPerformance() {
assertTrue(runText(false) < runText(true));
}
}

View file

@ -1,171 +1,33 @@
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin.tests;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.Channels;
import java.util.List;
import android.content.Context;
import android.test.AndroidTestCase;
import android.text.TextUtils;
import android.util.Log;
import com.android.inputmethod.latin.Suggest;
import com.android.inputmethod.latin.WordComposer;
public class SuggestTests extends AndroidTestCase {
private static final String TAG = "SuggestTests";
private Suggest mSuggest;
int[][] adjacents = {
{'a','s','w','q',-1},
{'b','h','v','n','g','j',-1},
{'c','v','f','x','g',},
{'d','f','r','e','s','x',-1},
{'e','w','r','s','d',-1},
{'f','g','d','c','t','r',-1},
{'g','h','f','y','t','v',-1},
{'h','j','u','g','b','y',-1},
{'i','o','u','k',-1},
{'j','k','i','h','u','n',-1},
{'k','l','o','j','i','m',-1},
{'l','k','o','p',-1},
{'m','k','n','l',-1},
{'n','m','j','k','b',-1},
{'o','p','i','l',-1},
{'p','o',-1},
{'q','w',-1},
{'r','t','e','f',-1},
{'s','d','e','w','a','z',-1},
{'t','y','r',-1},
{'u','y','i','h','j',-1},
{'v','b','g','c','h',-1},
{'w','e','q',-1},
{'x','c','d','z','f',-1},
{'y','u','t','h','g',-1},
{'z','s','x','a','d',-1},
};
private SuggestHelper sh;
@Override
protected void setUp() {
final Context context = getTestContext();
InputStream is = context.getResources().openRawResource(R.raw.test);
Log.i(TAG, "Stream type is " + is);
try {
int avail = is.available();
if (avail > 0) {
ByteBuffer byteBuffer =
ByteBuffer.allocateDirect(avail).order(ByteOrder.nativeOrder());
int got = Channels.newChannel(is).read(byteBuffer);
if (got != avail) {
Log.e(TAG, "Read " + got + " bytes, expected " + avail);
} else {
mSuggest = new Suggest(context, byteBuffer);
Log.i(TAG, "Created mSuggest " + avail + " bytes");
}
}
} catch (IOException ioe) {
Log.w(TAG, "No available size for binary dictionary");
}
mSuggest.setAutoTextEnabled(false);
mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM);
}
/************************** Helper functions ************************/
private WordComposer createWordComposer(CharSequence s) {
WordComposer word = new WordComposer();
for (int i = 0; i < s.length(); i++) {
final char c = s.charAt(i);
int[] codes;
// If it's not a lowercase letter, don't find adjacent letters
if (c < 'a' || c > 'z') {
codes = new int[] { c };
} else {
codes = adjacents[c - 'a'];
}
word.add(c, codes);
}
return word;
}
private void showList(String title, List<CharSequence> suggestions) {
Log.i(TAG, title);
for (int i = 0; i < suggestions.size(); i++) {
Log.i(title, suggestions.get(i) + ", ");
}
}
private boolean isDefaultSuggestion(List<CharSequence> suggestions, CharSequence word) {
// Check if either the word is what you typed or the first alternative
return suggestions.size() > 0 &&
(/*TextUtils.equals(suggestions.get(0), word) || */
(suggestions.size() > 1 && TextUtils.equals(suggestions.get(1), word)));
}
private boolean isDefaultSuggestion(CharSequence typed, CharSequence expected) {
WordComposer word = createWordComposer(typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
return isDefaultSuggestion(suggestions, expected);
}
private void getBigramSuggestions(CharSequence previous, CharSequence typed) {
if(!TextUtils.isEmpty(previous) && (typed.length() > 1)) {
WordComposer firstChar = createWordComposer(typed.charAt(0) + "");
mSuggest.getSuggestions(null, firstChar, false, previous);
}
}
private boolean isDefaultNextSuggestion(CharSequence previous, CharSequence typed,
CharSequence expected) {
WordComposer word = createWordComposer(typed);
getBigramSuggestions(previous, typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
return isDefaultSuggestion(suggestions, expected);
}
private boolean isDefaultCorrection(CharSequence typed, CharSequence expected) {
WordComposer word = createWordComposer(typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection();
}
private boolean isDefaultNextCorrection(CharSequence previous, CharSequence typed,
CharSequence expected) {
WordComposer word = createWordComposer(typed);
getBigramSuggestions(previous, typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
for(int i=0;i<suggestions.size();i++) {
Log.i(TAG,i+" "+suggestions.get(i));
}
return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection();
}
private boolean isASuggestion(CharSequence typed, CharSequence expected) {
WordComposer word = createWordComposer(typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
for (int i = 1; i < suggestions.size(); i++) {
if (TextUtils.equals(suggestions.get(i), expected)) return true;
}
return false;
}
private boolean isASuggestion(CharSequence previous, CharSequence typed,
CharSequence expected) {
WordComposer word = createWordComposer(typed);
getBigramSuggestions(previous, typed);
List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
for (int i = 1; i < suggestions.size(); i++) {
if (TextUtils.equals(suggestions.get(i), expected)) return true;
}
return false;
}
private boolean isValid(CharSequence typed) {
return mSuggest.isValidWord(typed);
int[] resId = new int[] { R.raw.test };
sh = new SuggestHelper(TAG, getTestContext(), resId);
}
/************************** Tests ************************/
@ -174,87 +36,87 @@ public class SuggestTests extends AndroidTestCase {
* Tests for simple completions of one character.
*/
public void testCompletion1char() {
assertTrue(isDefaultSuggestion("peopl", "people"));
assertTrue(isDefaultSuggestion("abou", "about"));
assertTrue(isDefaultSuggestion("thei", "their"));
assertTrue(sh.isDefaultSuggestion("peopl", "people"));
assertTrue(sh.isDefaultSuggestion("abou", "about"));
assertTrue(sh.isDefaultSuggestion("thei", "their"));
}
/**
* Tests for simple completions of two characters.
*/
public void testCompletion2char() {
assertTrue(isDefaultSuggestion("peop", "people"));
assertTrue(isDefaultSuggestion("calli", "calling"));
assertTrue(isDefaultSuggestion("busine", "business"));
assertTrue(sh.isDefaultSuggestion("peop", "people"));
assertTrue(sh.isDefaultSuggestion("calli", "calling"));
assertTrue(sh.isDefaultSuggestion("busine", "business"));
}
/**
* Tests for proximity errors.
*/
public void testProximityPositive() {
assertTrue(isDefaultSuggestion("peiple", "people"));
assertTrue(isDefaultSuggestion("peoole", "people"));
assertTrue(isDefaultSuggestion("pwpple", "people"));
assertTrue(sh.isDefaultSuggestion("peiple", "people"));
assertTrue(sh.isDefaultSuggestion("peoole", "people"));
assertTrue(sh.isDefaultSuggestion("pwpple", "people"));
}
/**
* Tests for proximity errors - negative, when the error key is not near.
*/
public void testProximityNegative() {
assertFalse(isDefaultSuggestion("arout", "about"));
assertFalse(isDefaultSuggestion("ire", "are"));
assertFalse(sh.isDefaultSuggestion("arout", "about"));
assertFalse(sh.isDefaultSuggestion("ire", "are"));
}
/**
* Tests for checking if apostrophes are added automatically.
*/
public void testApostropheInsertion() {
assertTrue(isDefaultSuggestion("im", "I'm"));
assertTrue(isDefaultSuggestion("dont", "don't"));
assertTrue(sh.isDefaultSuggestion("im", "I'm"));
assertTrue(sh.isDefaultSuggestion("dont", "don't"));
}
/**
* Test to make sure apostrophed word is not suggested for an apostrophed word.
*/
public void testApostrophe() {
assertFalse(isDefaultSuggestion("don't", "don't"));
assertFalse(sh.isDefaultSuggestion("don't", "don't"));
}
/**
* Tests for suggestion of capitalized version of a word.
*/
public void testCapitalization() {
assertTrue(isDefaultSuggestion("i'm", "I'm"));
assertTrue(isDefaultSuggestion("sunday", "Sunday"));
assertTrue(isDefaultSuggestion("sundat", "Sunday"));
assertTrue(sh.isDefaultSuggestion("i'm", "I'm"));
assertTrue(sh.isDefaultSuggestion("sunday", "Sunday"));
assertTrue(sh.isDefaultSuggestion("sundat", "Sunday"));
}
/**
* Tests to see if more than one completion is provided for certain prefixes.
*/
public void testMultipleCompletions() {
assertTrue(isASuggestion("com", "come"));
assertTrue(isASuggestion("com", "company"));
assertTrue(isASuggestion("th", "the"));
assertTrue(isASuggestion("th", "that"));
assertTrue(isASuggestion("th", "this"));
assertTrue(isASuggestion("th", "they"));
assertTrue(sh.isASuggestion("com", "come"));
assertTrue(sh.isASuggestion("com", "company"));
assertTrue(sh.isASuggestion("th", "the"));
assertTrue(sh.isASuggestion("th", "that"));
assertTrue(sh.isASuggestion("th", "this"));
assertTrue(sh.isASuggestion("th", "they"));
}
/**
* Does the suggestion engine recognize zero frequency words as valid words.
*/
public void testZeroFrequencyAccepted() {
assertTrue(isValid("yikes"));
assertFalse(isValid("yike"));
assertTrue(sh.isValid("yikes"));
assertFalse(sh.isValid("yike"));
}
/**
* Tests to make sure that zero frequency words are not suggested as completions.
*/
public void testZeroFrequencySuggestionsNegative() {
assertFalse(isASuggestion("yike", "yikes"));
assertFalse(isASuggestion("what", "whatcha"));
assertFalse(sh.isASuggestion("yike", "yikes"));
assertFalse(sh.isASuggestion("what", "whatcha"));
}
/**
@ -262,16 +124,16 @@ public class SuggestTests extends AndroidTestCase {
* and not considered corrections, in some cases.
*/
public void testTooLargeEditDistance() {
assertFalse(isASuggestion("sniyr", "about"));
assertFalse(isDefaultCorrection("rjw", "the"));
assertFalse(sh.isASuggestion("sniyr", "about"));
assertFalse(sh.isDefaultCorrection("rjw", "the"));
}
/**
* Make sure isValid is case-sensitive.
* Make sure sh.isValid is case-sensitive.
*/
public void testValidityCaseSensitivity() {
assertTrue(isValid("Sunday"));
assertFalse(isValid("sunday"));
assertTrue(sh.isValid("Sunday"));
assertFalse(sh.isValid("sunday"));
}
/**
@ -279,11 +141,11 @@ public class SuggestTests extends AndroidTestCase {
*/
public void testAccents() {
// ni<LATIN SMALL LETTER N WITH TILDE>o
assertTrue(isDefaultCorrection("nino", "ni\u00F1o"));
assertTrue(sh.isDefaultCorrection("nino", "ni\u00F1o"));
// ni<LATIN SMALL LETTER N WITH TILDE>o
assertTrue(isDefaultCorrection("nimo", "ni\u00F1o"));
assertTrue(sh.isDefaultCorrection("nimo", "ni\u00F1o"));
// Mar<LATIN SMALL LETTER I WITH ACUTE>a
assertTrue(isDefaultCorrection("maria", "Mar\u00EDa"));
assertTrue(sh.isDefaultCorrection("maria", "Mar\u00EDa"));
}
/**
@ -291,20 +153,20 @@ public class SuggestTests extends AndroidTestCase {
* and don't show any when there aren't any
*/
public void testBigramsAtFirstChar() {
assertTrue(isDefaultNextCorrection("about", "p", "part"));
assertTrue(isDefaultNextCorrection("I'm", "a", "about"));
assertTrue(isDefaultNextCorrection("about", "b", "business"));
assertTrue(isASuggestion("about", "b", "being"));
assertFalse(isDefaultNextSuggestion("about", "p", "business"));
assertTrue(sh.isDefaultNextSuggestion("about", "p", "part"));
assertTrue(sh.isDefaultNextSuggestion("I'm", "a", "about"));
assertTrue(sh.isDefaultNextSuggestion("about", "b", "business"));
assertTrue(sh.isASuggestion("about", "b", "being"));
assertFalse(sh.isDefaultNextSuggestion("about", "p", "business"));
}
/**
* Make sure bigrams score affects the original score
*/
public void testBigramsScoreEffect() {
assertTrue(isDefaultCorrection("pa", "page"));
assertTrue(isDefaultNextCorrection("about", "pa", "part"));
assertTrue(isDefaultCorrection("sa", "said"));
assertTrue(isDefaultNextCorrection("from", "sa", "same"));
assertTrue(sh.isDefaultCorrection("pa", "page"));
assertTrue(sh.isDefaultNextCorrection("about", "pa", "part"));
assertTrue(sh.isDefaultCorrection("sa", "said"));
assertTrue(sh.isDefaultNextCorrection("from", "sa", "same"));
}
}