diff --git a/java/Android.mk b/java/Android.mk
index a6dc49b2b..172f35a89 100755
--- a/java/Android.mk
+++ b/java/Android.mk
@@ -18,7 +18,7 @@ LOCAL_STATIC_JAVA_LIBRARIES := android-common
# com.google.android.inputmethod.latin in the LatinIME sandbox.
LOCAL_AAPT_FLAGS := --custom-package com.android.inputmethod.latin
-LOCAL_SDK_VERSION := current
+LOCAL_SDK_VERSION := 8
LOCAL_PROGUARD_FLAGS := -include $(LOCAL_PATH)/proguard.flags
diff --git a/java/res/values/strings.xml b/java/res/values/strings.xml
index f2d823f0b..6c7aa8324 100644
--- a/java/res/values/strings.xml
+++ b/java/res/values/strings.xml
@@ -86,6 +86,11 @@
Spacebar and punctuation automatically insert highlighted word
+
+ Bigram Suggestions
+
+ Use previous word to improve suggestion
+
- None
diff --git a/java/res/xml/prefs.xml b/java/res/xml/prefs.xml
index 10e3998f9..12a1ed305 100644
--- a/java/res/xml/prefs.xml
+++ b/java/res/xml/prefs.xml
@@ -98,10 +98,18 @@
android:defaultValue="@bool/enable_autocorrect"
android:dependency="show_suggestions"
/>
-
-
-
+
+
+ 0) {
+ callback.addWord(mOutputChars_bigrams, start, len, mFrequencies_bigrams[j],
+ DataType.BIGRAM);
+ }
+ }
+ }
+
@Override
public void getWords(final WordComposer codes, final WordCallback callback,
int[] nextLettersFrequencies) {
@@ -168,7 +196,7 @@ public class BinaryDictionary extends Dictionary {
len++;
}
if (len > 0) {
- callback.addWord(mOutputChars, start, len, mFrequencies[j], mDicTypeId);
+ callback.addWord(mOutputChars, start, len, mFrequencies[j], mDicTypeId, DataType.UNIGRAM);
}
}
}
diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java
index e38a32fa1..a02edeee5 100644
--- a/java/src/com/android/inputmethod/latin/Dictionary.java
+++ b/java/src/com/android/inputmethod/latin/Dictionary.java
@@ -21,7 +21,9 @@ package com.android.inputmethod.latin;
* strokes.
*/
abstract public class Dictionary {
-
+
+ protected static final int MAX_WORD_LENGTH = 48;
+
/**
* Whether or not to replicate the typed word in the suggested list, even if it's valid.
*/
@@ -31,7 +33,11 @@ abstract public class Dictionary {
* The weight to give to a word if it's length is the same as the number of typed characters.
*/
protected static final int FULL_WORD_FREQ_MULTIPLIER = 2;
-
+
+ public static enum DataType {
+ UNIGRAM, BIGRAM
+ }
+
/**
* Interface to be implemented by classes requesting words to be fetched from the dictionary.
* @see #getWords(WordComposer, WordCallback)
@@ -46,9 +52,11 @@ abstract public class Dictionary {
* @param frequency the frequency of occurence. This is normalized between 1 and 255, but
* can exceed those limits
* @param dicTypeId of the dictionary where word was from
+ * @param dataType tells type of this data
* @return true if the word was added, false if no more words are required
*/
- boolean addWord(char[] word, int wordOffset, int wordLength, int frequency, int dicTypeId);
+ boolean addWord(char[] word, int wordOffset, int wordLength, int frequency, int dicTypeId,
+ DataType dataType);
}
/**
@@ -65,6 +73,21 @@ abstract public class Dictionary {
abstract public void getWords(final WordComposer composer, final WordCallback callback,
int[] nextLettersFrequencies);
+ /**
+ * Searches for pairs in the bigram dictionary that matches the previous word and all the
+ * possible words following are added through the callback object.
+ * @param composer the key sequence to match
+ * @param callback the callback object to send possible word following previous word
+ * @param nextLettersFrequencies array of frequencies of next letters that could follow the
+ * word so far. For instance, "bracke" can be followed by "t", so array['t'] will have
+ * a non-zero value on returning from this method.
+ * Pass in null if you don't want the dictionary to look up next letters.
+ */
+ public void getBigrams(final WordComposer composer, final CharSequence previousWord,
+ final WordCallback callback, int[] nextLettersFrequencies) {
+ // empty base implementation
+ }
+
/**
* Checks if the given word occurs in the dictionary
* @param word the word to search for. The search should be case-insensitive.
diff --git a/java/src/com/android/inputmethod/latin/EditingUtil.java b/java/src/com/android/inputmethod/latin/EditingUtil.java
index 7571f1daf..5133c60ca 100644
--- a/java/src/com/android/inputmethod/latin/EditingUtil.java
+++ b/java/src/com/android/inputmethod/latin/EditingUtil.java
@@ -16,6 +16,8 @@
package com.android.inputmethod.latin;
+import java.util.regex.Pattern;
+
import android.view.inputmethod.ExtractedText;
import android.view.inputmethod.ExtractedTextRequest;
import android.view.inputmethod.InputConnection;
@@ -24,6 +26,11 @@ import android.view.inputmethod.InputConnection;
* Utility methods to deal with editing text through an InputConnection.
*/
public class EditingUtil {
+ /**
+ * Number of characters we want to look back in order to identify the previous word
+ */
+ public static final int LOOKBACK_CHARACTER_NUM = 15;
+
private EditingUtil() {};
/**
@@ -175,4 +182,13 @@ public class EditingUtil {
private static boolean isWhitespace(int code, String whitespace) {
return whitespace.contains(String.valueOf((char) code));
}
+
+ private static final Pattern spaceRegex = Pattern.compile("\\s+");
+
+ public static CharSequence getPreviousWord(InputConnection connection) {
+ //TODO: Should fix this. This could be slow!
+ CharSequence prev = connection.getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0);
+ String[] w = spaceRegex.split(prev);
+ return (w.length >= 2) ? w[w.length-2] : null;
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
index e2a812796..d8a9547c1 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
@@ -266,7 +266,8 @@ public class ExpandableDictionary extends Dictionary {
if (completion) {
word[depth] = c;
if (terminal) {
- if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId)) {
+ if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId,
+ DataType.UNIGRAM)) {
return;
}
// Add to frequency of next letters for predictive correction
@@ -304,7 +305,8 @@ public class ExpandableDictionary extends Dictionary {
|| !same(word, depth + 1, codes.getTypedWord())) {
int finalFreq = freq * snr * addedAttenuation;
if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
- callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId);
+ callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
+ DataType.UNIGRAM);
}
}
if (children != null) {
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 2cc92e133..2527c81fa 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -48,8 +48,8 @@ import android.view.HapticFeedbackConstants;
import android.view.KeyEvent;
import android.view.LayoutInflater;
import android.view.View;
-import android.view.ViewParent;
import android.view.ViewGroup;
+import android.view.ViewParent;
import android.view.Window;
import android.view.WindowManager;
import android.view.inputmethod.CompletionInfo;
@@ -88,6 +88,7 @@ public class LatinIME extends InputMethodService
private static final String PREF_QUICK_FIXES = "quick_fixes";
private static final String PREF_SHOW_SUGGESTIONS = "show_suggestions";
private static final String PREF_AUTO_COMPLETE = "auto_complete";
+ private static final String PREF_BIGRAM_SUGGESTIONS = "bigram_suggestion";
private static final String PREF_VOICE_MODE = "voice_mode";
// Whether or not the user has used voice input before (and thus, whether to show the
@@ -186,6 +187,7 @@ public class LatinIME extends InputMethodService
private boolean mAutoSpace;
private boolean mJustAddedAutoSpace;
private boolean mAutoCorrectEnabled;
+ private boolean mBigramSuggestionEnabled;
private boolean mAutoCorrectOn;
private boolean mCapsLock;
private boolean mPasswordText;
@@ -713,12 +715,14 @@ public class LatinIME extends InputMethodService
// TODO: Uncomment this block when we enable re-editing feature
// If a word is selected
- /*if ((candidatesStart == candidatesEnd || newSelStart != oldSelStart)
+ /*if (isPredictionOn() && mJustRevertedSeparator == null
+ && (candidatesStart == candidatesEnd || newSelStart != oldSelStart)
&& (newSelStart < newSelEnd - 1 || (!mPredicting))
&& !mVoiceInputHighlighted) {
- abortCorrection(false);
if (isCursorTouchingWord() || mLastSelectionStart < mLastSelectionEnd) {
postUpdateOldSuggestions();
+ } else {
+ abortCorrection(false);
}
}*/
}
@@ -1113,6 +1117,8 @@ public class LatinIME extends InputMethodService
InputConnection ic = getCurrentInputConnection();
if (ic == null) return;
+ ic.beginBatchEdit();
+
if (mAfterVoiceInput) {
// Don't log delete if the user is pressing delete at
// the beginning of the text box (hence not deleting anything)
@@ -1145,6 +1151,7 @@ public class LatinIME extends InputMethodService
TextEntryState.backspace();
if (TextEntryState.getState() == TextEntryState.STATE_UNDO_COMMIT) {
revertLastWord(deleteChar);
+ ic.endBatchEdit();
return;
} else if (mEnteredText != null && sameAsTextBeforeCursor(ic, mEnteredText)) {
ic.deleteSurroundingText(mEnteredText.length(), 0);
@@ -1155,6 +1162,7 @@ public class LatinIME extends InputMethodService
}
}
mJustRevertedSeparator = null;
+ ic.endBatchEdit();
}
private void handleShift() {
@@ -1312,9 +1320,10 @@ public class LatinIME extends InputMethodService
mWord.reset();
return;
}
- TypedWordAlternatives entry = new TypedWordAlternatives(result, mWord);
- // Create a new WordComposer as the old one is being saved for later use
- mWord = new WordComposer(mWord);
+ // Make a copy of the CharSequence, since it is/could be a mutable CharSequence
+ final String resultCopy = result.toString();
+ TypedWordAlternatives entry = new TypedWordAlternatives(resultCopy,
+ new WordComposer(mWord));
mWordHistory.add(entry);
}
@@ -1569,8 +1578,7 @@ public class LatinIME extends InputMethodService
}
private List getTypedSuggestions(WordComposer word) {
- List stringList = mSuggest.getSuggestions(
- mKeyboardSwitcher.getInputView(), word, false);
+ List stringList = mSuggest.getSuggestions(mKeyboardSwitcher.getInputView(), word, false, null);
return stringList;
}
@@ -1581,8 +1589,14 @@ public class LatinIME extends InputMethodService
}
private void showSuggestions(WordComposer word) {
- List stringList = mSuggest.getSuggestions(
- mKeyboardSwitcher.getInputView(), word, false);
+ //long startTime = System.currentTimeMillis(); // TIME MEASUREMENT!
+ // TODO Maybe need better way of retrieving previous word
+ CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection());
+ List stringList = mSuggest.getSuggestions(mKeyboardSwitcher.getInputView(), word, false,
+ prevWord);
+ //long stopTime = System.currentTimeMillis(); // TIME MEASUREMENT!
+ //Log.d("LatinIME","Suggest Total Time - " + (stopTime - startTime));
+
int[] nextLettersFrequencies = mSuggest.getNextLettersFrequencies();
((LatinKeyboard) mKeyboardSwitcher.getInputView().getKeyboard()).setPreferredLetters(
@@ -1699,7 +1713,8 @@ public class LatinIME extends InputMethodService
// Fool the state watcher so that a subsequent backspace will not do a revert
TextEntryState.typedCharacter((char) KEYCODE_SPACE, true);
- if (index == 0 && mCorrectionMode > 0 && !mSuggest.isValidWord(suggestion)) {
+ if (index == 0 && mCorrectionMode > 0 && !mSuggest.isValidWord(suggestion)
+ && !mSuggest.isValidWord(suggestion.toString().toLowerCase())) {
mCandidateView.showAddToDictionaryHint(suggestion);
}
if (ic != null) {
@@ -1713,9 +1728,9 @@ public class LatinIME extends InputMethodService
InputConnection ic = getCurrentInputConnection();
EditingUtil.Range range = new EditingUtil.Range();
String wordToBeReplaced = EditingUtil.getWordAtCursor(getCurrentInputConnection(),
- mWordSeparators, range).trim();
+ mWordSeparators, range);
if (!mWordToSuggestions.containsKey(wordToBeReplaced)) {
- wordToBeReplaced = wordToBeReplaced.toLowerCase();
+ wordToBeReplaced = wordToBeReplaced.toLowerCase();
}
if (mWordToSuggestions.containsKey(wordToBeReplaced)) {
List suggestions = mWordToSuggestions.get(wordToBeReplaced);
@@ -1743,9 +1758,6 @@ public class LatinIME extends InputMethodService
InputConnection ic = getCurrentInputConnection();
if (ic != null) {
rememberReplacedWord(suggestion);
- if (mSuggestionShouldReplaceCurrentWord) {
- EditingUtil.deleteWordAtCursor(ic, getWordSeparators());
- }
if (!VoiceInput.DELETE_SYMBOL.equals(suggestion)) {
ic.commitText(suggestion, 1);
}
@@ -1772,9 +1784,8 @@ public class LatinIME extends InputMethodService
}
if (!mPredicting && isCursorTouchingWord()) {
EditingUtil.Range range = new EditingUtil.Range();
- CharSequence touching =
- EditingUtil.getWordAtCursor(getCurrentInputConnection(), mWordSeparators,
- range);
+ CharSequence touching = EditingUtil.getWordAtCursor(getCurrentInputConnection(),
+ mWordSeparators, range);
if (touching != null && touching.length() > 1) {
if (mWordSeparators.indexOf(touching.charAt(touching.length() - 1)) > 0) {
touching = touching.toString().substring(0, touching.length() - 1);
@@ -1835,7 +1846,7 @@ public class LatinIME extends InputMethodService
foundWord);
showCorrections(alternatives);
if (foundWord != null) {
- mWord = foundWord;
+ mWord = new WordComposer(foundWord);
} else {
mWord.reset();
}
@@ -1868,6 +1879,7 @@ public class LatinIME extends InputMethodService
private void underlineWord(CharSequence word, int left, int right) {
InputConnection ic = getCurrentInputConnection();
if (ic == null) return;
+ ic.finishComposingText();
ic.deleteSurroundingText(left, right);
ic.setComposingText(word, 1);
ic.setSelection(mLastSelectionStart, mLastSelectionStart);
@@ -1912,7 +1924,6 @@ public class LatinIME extends InputMethodService
if (!mPredicting && length > 0) {
final InputConnection ic = getCurrentInputConnection();
mPredicting = true;
- ic.beginBatchEdit();
mJustRevertedSeparator = ic.getTextBeforeCursor(1, 0);
if (deleteChar) ic.deleteSurroundingText(1, 0);
int toDelete = mCommittedLength;
@@ -1924,7 +1935,6 @@ public class LatinIME extends InputMethodService
ic.deleteSurroundingText(toDelete, 0);
ic.setComposingText(mComposing, 1);
TextEntryState.backspace();
- ic.endBatchEdit();
postUpdateSuggestions();
} else {
sendDownUpKeyEvents(KeyEvent.KEYCODE_DEL);
@@ -2139,6 +2149,8 @@ public class LatinIME extends InputMethodService
mCorrectionMode = (mAutoCorrectOn && mAutoCorrectEnabled)
? Suggest.CORRECTION_FULL
: (mAutoCorrectOn ? Suggest.CORRECTION_BASIC : Suggest.CORRECTION_NONE);
+ mCorrectionMode = (mBigramSuggestionEnabled && mAutoCorrectOn && mAutoCorrectEnabled)
+ ? Suggest.CORRECTION_FULL_BIGRAM : mCorrectionMode;
if (mSuggest != null) {
mSuggest.setCorrectionMode(mCorrectionMode);
}
@@ -2205,6 +2217,7 @@ public class LatinIME extends InputMethodService
}
mAutoCorrectEnabled = sp.getBoolean(PREF_AUTO_COMPLETE,
mResources.getBoolean(R.bool.enable_autocorrect)) & mShowSuggestions;
+ mBigramSuggestionEnabled = sp.getBoolean(PREF_BIGRAM_SUGGESTIONS, true) & mShowSuggestions;
updateCorrectionMode();
updateAutoTextEnabled(mResources.getConfiguration().locale);
mLanguageSwitcher.loadLocales(sp);
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index e7b9e5d69..6705e9a36 100755
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -37,6 +37,21 @@ public class Suggest implements Dictionary.WordCallback {
public static final int CORRECTION_NONE = 0;
public static final int CORRECTION_BASIC = 1;
public static final int CORRECTION_FULL = 2;
+ public static final int CORRECTION_FULL_BIGRAM = 3;
+
+ /**
+ * Words that appear in both bigram and unigram data gets multiplier ranging from
+ * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the frequency score from
+ * bigram data.
+ */
+ public static final double BIGRAM_MULTIPLIER_MIN = 1.2;
+ public static final double BIGRAM_MULTIPLIER_MAX = 1.5;
+
+ /**
+ * Maximum possible bigram frequency. Will depend on how many bits are being used in data
+ * structure. Maximum bigram freqeuncy will get the BIGRAM_MULTIPLIER_MAX as the multiplier.
+ */
+ public static final int MAXIMUM_BIGRAM_FREQUENCY = 127;
public static final int DIC_USER_TYPED = 0;
public static final int DIC_MAIN = 1;
@@ -57,10 +72,13 @@ public class Suggest implements Dictionary.WordCallback {
private Dictionary mContactsDictionary;
private int mPrefMaxSuggestions = 12;
+ private int mPrefMaxBigrams = 255;
private boolean mAutoTextEnabled;
private int[] mPriorities = new int[mPrefMaxSuggestions];
+ private int[] mBigramPriorities = new int[mPrefMaxBigrams];
+
// Handle predictive correction for only the first 1280 characters for performance reasons
// If we support scripts that need latin characters beyond that, we should probably use some
// kind of a sparse array or language specific list with a mapping lookup table.
@@ -68,6 +86,7 @@ public class Suggest implements Dictionary.WordCallback {
// latin characters.
private int[] mNextLettersFrequencies = new int[1280];
private ArrayList mSuggestions = new ArrayList();
+ private ArrayList mBigramSuggestions = new ArrayList();
private ArrayList mStringPool = new ArrayList();
private boolean mHaveCorrection;
private CharSequence mOriginalWord;
@@ -88,7 +107,7 @@ public class Suggest implements Dictionary.WordCallback {
private void initPool() {
for (int i = 0; i < mPrefMaxSuggestions; i++) {
- StringBuilder sb = new StringBuilder(32);
+ StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH);
mStringPool.add(sb);
}
}
@@ -140,9 +159,10 @@ public class Suggest implements Dictionary.WordCallback {
}
mPrefMaxSuggestions = maxSuggestions;
mPriorities = new int[mPrefMaxSuggestions];
- collectGarbage();
+ mBigramPriorities = new int[mPrefMaxBigrams];
+ collectGarbage(mSuggestions, mPrefMaxSuggestions);
while (mStringPool.size() < mPrefMaxSuggestions) {
- StringBuilder sb = new StringBuilder(32);
+ StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH);
mStringPool.add(sb);
}
}
@@ -177,18 +197,17 @@ public class Suggest implements Dictionary.WordCallback {
/**
* Returns a list of words that match the list of character codes passed in.
* This list will be overwritten the next time this function is called.
- * @param a view for retrieving the context for AutoText
- * @param codes the list of codes. Each list item contains an array of character codes
- * in order of probability where the character at index 0 in the array has the highest
- * probability.
+ * @param view a view for retrieving the context for AutoText
+ * @param wordComposer contains what is currently being typed
+ * @param prevWordForBigram previous word (used only for bigram)
* @return list of suggestions.
*/
public List getSuggestions(View view, WordComposer wordComposer,
- boolean includeTypedWordIfValid) {
+ boolean includeTypedWordIfValid, CharSequence prevWordForBigram) {
LatinImeLogger.onStartSuggestion();
mHaveCorrection = false;
mCapitalize = wordComposer.isCapitalized();
- collectGarbage();
+ collectGarbage(mSuggestions, mPrefMaxSuggestions);
Arrays.fill(mPriorities, 0);
Arrays.fill(mNextLettersFrequencies, 0);
@@ -203,7 +222,38 @@ public class Suggest implements Dictionary.WordCallback {
}
// Search the dictionary only if there are at least 2 characters
- if (wordComposer.size() > 1) {
+ if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM
+ || mCorrectionMode == CORRECTION_BASIC)) {
+ // At first character, just get the bigrams
+ Arrays.fill(mBigramPriorities, 0);
+ collectGarbage(mBigramSuggestions, mPrefMaxBigrams);
+
+ if (!TextUtils.isEmpty(prevWordForBigram)) {
+ CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
+ if (mMainDict.isValidWord(lowerPrevWord)) {
+ prevWordForBigram = lowerPrevWord;
+ }
+ mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
+ mNextLettersFrequencies);
+ char currentChar = wordComposer.getTypedWord().charAt(0);
+ int count = 0;
+ int bigramSuggestionSize = mBigramSuggestions.size();
+ for (int i = 0; i < bigramSuggestionSize; i++) {
+ if (mBigramSuggestions.get(i).charAt(0) == currentChar) {
+ int poolSize = mStringPool.size();
+ StringBuilder sb = poolSize > 0 ?
+ (StringBuilder) mStringPool.remove(poolSize - 1)
+ : new StringBuilder(Dictionary.MAX_WORD_LENGTH);
+ sb.setLength(0);
+ sb.append(mBigramSuggestions.get(i));
+ mSuggestions.add(count++, sb);
+ if (count > mPrefMaxSuggestions) break;
+ }
+ }
+ }
+
+ } else if (wordComposer.size() > 1) {
+ // Search the dictionary only if there are at least 2 characters
if (mUserDictionary != null || mContactsDictionary != null) {
if (mUserDictionary != null) {
mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
@@ -213,21 +263,26 @@ public class Suggest implements Dictionary.WordCallback {
}
if (mSuggestions.size() > 0 && isValidWord(mOriginalWord)
- && mCorrectionMode == CORRECTION_FULL) {
+ && (mCorrectionMode == CORRECTION_FULL
+ || mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
mHaveCorrection = true;
}
}
mMainDict.getWords(wordComposer, this, mNextLettersFrequencies);
- if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 0) {
+ if ((mCorrectionMode == CORRECTION_FULL || mCorrectionMode == CORRECTION_FULL_BIGRAM)
+ && mSuggestions.size() > 0) {
mHaveCorrection = true;
}
}
+
if (mOriginalWord != null) {
mSuggestions.add(0, mOriginalWord.toString());
}
-
+
// Check if the first suggestion has a minimum number of characters in common
- if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 1) {
+ if (wordComposer.size() > 1 && mSuggestions.size() > 1
+ && (mCorrectionMode == CORRECTION_FULL
+ || mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) {
mHaveCorrection = false;
}
@@ -258,7 +313,6 @@ public class Suggest implements Dictionary.WordCallback {
i++;
}
}
-
removeDupes();
return mSuggestions;
}
@@ -312,21 +366,50 @@ public class Suggest implements Dictionary.WordCallback {
return false;
}
- public boolean addWord(final char[] word, final int offset, final int length,
- final int freq, final int dicTypeId) {
+ public boolean addWord(final char[] word, final int offset, final int length, int freq,
+ final int dicTypeId, final Dictionary.DataType dataType) {
+ ArrayList suggestions;
+ int[] priorities;
+ int prefMaxSuggestions;
+ if(dataType == Dictionary.DataType.BIGRAM) {
+ suggestions = mBigramSuggestions;
+ priorities = mBigramPriorities;
+ prefMaxSuggestions = mPrefMaxBigrams;
+ } else {
+ suggestions = mSuggestions;
+ priorities = mPriorities;
+ prefMaxSuggestions = mPrefMaxSuggestions;
+ }
+
int pos = 0;
- final int[] priorities = mPriorities;
- final int prefMaxSuggestions = mPrefMaxSuggestions;
+
// Check if it's the same word, only caps are different
if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) {
pos = 0;
} else {
+ if (dataType == Dictionary.DataType.UNIGRAM) {
+ // Check if the word was already added before (by bigram data)
+ int bigramSuggestion = searchBigramSuggestion(word,offset,length);
+ if(bigramSuggestion >= 0) {
+ // turn freq from bigram into multiplier specified above
+ double multiplier = (((double) mBigramPriorities[bigramSuggestion])
+ / MAXIMUM_BIGRAM_FREQUENCY)
+ * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN)
+ + BIGRAM_MULTIPLIER_MIN;
+ /* Log.d("Suggest","bigram num: " + bigramSuggestion
+ + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString()
+ + " currentPriority: " + freq + " bigramPriority: "
+ + mBigramPriorities[bigramSuggestion]
+ + " multiplier: " + multiplier); */
+ freq = (int)Math.round((freq * multiplier));
+ }
+ }
+
// Check the last one's priority and bail
if (priorities[prefMaxSuggestions - 1] >= freq) return true;
while (pos < prefMaxSuggestions) {
if (priorities[pos] < freq
- || (priorities[pos] == freq && length < mSuggestions
- .get(pos).length())) {
+ || (priorities[pos] == freq && length < suggestions.get(pos).length())) {
break;
}
pos++;
@@ -336,12 +419,13 @@ public class Suggest implements Dictionary.WordCallback {
if (pos >= prefMaxSuggestions) {
return true;
}
+
System.arraycopy(priorities, pos, priorities, pos + 1,
prefMaxSuggestions - pos - 1);
priorities[pos] = freq;
int poolSize = mStringPool.size();
StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
- : new StringBuilder(32);
+ : new StringBuilder(Dictionary.MAX_WORD_LENGTH);
sb.setLength(0);
if (mCapitalize) {
sb.append(Character.toUpperCase(word[offset]));
@@ -351,9 +435,9 @@ public class Suggest implements Dictionary.WordCallback {
} else {
sb.append(word, offset, length);
}
- mSuggestions.add(pos, sb);
- if (mSuggestions.size() > prefMaxSuggestions) {
- CharSequence garbage = mSuggestions.remove(prefMaxSuggestions);
+ suggestions.add(pos, sb);
+ if (suggestions.size() > prefMaxSuggestions) {
+ CharSequence garbage = suggestions.remove(prefMaxSuggestions);
if (garbage instanceof StringBuilder) {
mStringPool.add(garbage);
}
@@ -363,6 +447,26 @@ public class Suggest implements Dictionary.WordCallback {
return true;
}
+ private int searchBigramSuggestion(final char[] word, final int offset, final int length) {
+ // TODO This is almost O(n^2). Might need fix.
+ // search whether the word appeared in bigram data
+ int bigramSuggestSize = mBigramSuggestions.size();
+ for(int i = 0; i < bigramSuggestSize; i++) {
+ if(mBigramSuggestions.get(i).length() == length) {
+ boolean chk = true;
+ for(int j = 0; j < length; j++) {
+ if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) {
+ chk = false;
+ break;
+ }
+ }
+ if(chk) return i;
+ }
+ }
+
+ return -1;
+ }
+
public boolean isValidWord(final CharSequence word) {
if (word == null || word.length() == 0) {
return false;
@@ -373,21 +477,21 @@ public class Suggest implements Dictionary.WordCallback {
|| (mContactsDictionary != null && mContactsDictionary.isValidWord(word));
}
- private void collectGarbage() {
+ private void collectGarbage(ArrayList suggestions, int prefMaxSuggestions) {
int poolSize = mStringPool.size();
- int garbageSize = mSuggestions.size();
- while (poolSize < mPrefMaxSuggestions && garbageSize > 0) {
- CharSequence garbage = mSuggestions.get(garbageSize - 1);
+ int garbageSize = suggestions.size();
+ while (poolSize < prefMaxSuggestions && garbageSize > 0) {
+ CharSequence garbage = suggestions.get(garbageSize - 1);
if (garbage != null && garbage instanceof StringBuilder) {
mStringPool.add(garbage);
poolSize++;
}
garbageSize--;
}
- if (poolSize == mPrefMaxSuggestions + 1) {
+ if (poolSize == prefMaxSuggestions + 1) {
Log.w("Suggest", "String pool got too big: " + poolSize);
}
- mSuggestions.clear();
+ suggestions.clear();
}
public void close() {
diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java
index e2573a0a9..1ea74847a 100644
--- a/java/src/com/android/inputmethod/latin/WordComposer.java
+++ b/java/src/com/android/inputmethod/latin/WordComposer.java
@@ -55,7 +55,9 @@ public class WordComposer {
mTypedWord = new StringBuilder(copy.mTypedWord);
mCapsCount = copy.mCapsCount;
mAutoCapitalized = copy.mAutoCapitalized;
+ mIsCapitalized = copy.mIsCapitalized;
}
+
/**
* Clear out the keys registered so far.
*/
diff --git a/native/Android.mk b/native/Android.mk
index 57551097b..6492de6bb 100644
--- a/native/Android.mk
+++ b/native/Android.mk
@@ -8,8 +8,8 @@ LOCAL_SRC_FILES := \
src/dictionary.cpp \
src/char_utils.cpp
-LOCAL_C_INCLUDES += \
- $(JNI_H_INCLUDE)
+LOCAL_NDK_VERSION := 4
+LOCAL_SDK_VERSION := 8
LOCAL_PRELINK_MODULE := false
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index bb45cb538..4fe80da69 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -59,8 +59,7 @@ static int latinime_BinaryDictionary_getSuggestions(
jint maxAlternatives, jint skipPos, jintArray nextLettersArray, jint nextLettersSize)
{
Dictionary *dictionary = (Dictionary*) dict;
- if (dictionary == NULL)
- return 0;
+ if (dictionary == NULL) return 0;
int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
@@ -81,6 +80,28 @@ static int latinime_BinaryDictionary_getSuggestions(
return count;
}
+static int latinime_BinaryDictionary_getBigrams
+ (JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength,
+ jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams)
+{
+ Dictionary *dictionary = (Dictionary*) dict;
+ if (dictionary == NULL) return 0;
+
+ jchar *word = env->GetCharArrayElements(wordArray, NULL);
+ jchar *outputChars = env->GetCharArrayElements(outputArray, NULL);
+ int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
+
+ int count = dictionary->getBigrams((unsigned short*) word, wordLength,
+ (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams);
+
+ env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
+ env->ReleaseCharArrayElements(outputArray, outputChars, 0);
+ env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
+
+ return count;
+}
+
+
static jboolean latinime_BinaryDictionary_isValidWord
(JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength)
{
@@ -108,7 +129,8 @@ static JNINativeMethod gMethods[] = {
(void*)latinime_BinaryDictionary_open},
{"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close},
{"getSuggestionsNative", "(I[II[C[IIIII[II)I", (void*)latinime_BinaryDictionary_getSuggestions},
- {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}
+ {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
+ {"getBigramsNative", "(I[CI[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}
};
static int registerNativeMethods(JNIEnv* env, const char* className,
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
index e75beb5b7..a1a632faa 100644
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@@ -19,6 +19,7 @@
#include
#include
#include
+//#define LOG_TAG "dictionary.cpp"
//#include
#define LOGI
@@ -27,6 +28,9 @@
#include "char_utils.h"
#define DEBUG_DICT 0
+#define DICTIONARY_VERSION_MIN 200
+#define DICTIONARY_HEADER_SIZE 2
+#define NOT_VALID_WORD -99
namespace latinime {
@@ -35,6 +39,7 @@ Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultip
mDict = (unsigned char*) dict;
mTypedLetterMultiplier = typedLetterMultiplier;
mFullWordMultiplier = fullWordMultiplier;
+ getVersionNumber();
}
Dictionary::~Dictionary()
@@ -58,7 +63,11 @@ int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWor
mNextLettersFrequencies = nextLetters;
mNextLettersSize = nextLettersSize;
- getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
+ if (checkIfDictVersionIsLatest()) {
+ getWordsRec(DICTIONARY_HEADER_SIZE, 0, mInputLength * 3, false, 1, 0, 0);
+ } else {
+ getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
+ }
// Get the word count
suggWords = 0;
@@ -85,6 +94,21 @@ Dictionary::registerNextLetter(unsigned short c)
}
}
+void
+Dictionary::getVersionNumber()
+{
+ mVersion = (mDict[0] & 0xFF);
+ mBigram = (mDict[1] & 0xFF);
+ LOGI("IN NATIVE SUGGEST Version: %d Bigram : %d \n", mVersion, mBigram);
+}
+
+// Checks whether it has the latest dictionary or the old dictionary
+bool
+Dictionary::checkIfDictVersionIsLatest()
+{
+ return (mVersion >= DICTIONARY_VERSION_MIN) && (mBigram == 1 || mBigram == 0);
+}
+
unsigned short
Dictionary::getChar(int *pos)
{
@@ -112,6 +136,28 @@ Dictionary::getAddress(int *pos)
return address;
}
+int
+Dictionary::getFreq(int *pos)
+{
+ int freq = mDict[(*pos)++] & 0xFF;
+
+ if (checkIfDictVersionIsLatest()) {
+ // skipping bigram
+ int bigramExist = (mDict[*pos] & FLAG_BIGRAM_READ);
+ if (bigramExist > 0) {
+ int nextBigramExist = 1;
+ while (nextBigramExist > 0) {
+ (*pos) += 3;
+ nextBigramExist = (mDict[(*pos)++] & FLAG_BIGRAM_CONTINUED);
+ }
+ } else {
+ (*pos)++;
+ }
+ }
+
+ return freq;
+}
+
int
Dictionary::wideStrLen(unsigned short *str)
{
@@ -161,6 +207,46 @@ Dictionary::addWord(unsigned short *word, int length, int frequency)
return false;
}
+bool
+Dictionary::addWordBigram(unsigned short *word, int length, int frequency)
+{
+ word[length] = 0;
+ if (DEBUG_DICT) {
+ char s[length + 1];
+ for (int i = 0; i <= length; i++) s[i] = word[i];
+ LOGI("Bigram: Found word = %s, freq = %d : \n", s, frequency);
+ }
+
+ // Find the right insertion point
+ int insertAt = 0;
+ while (insertAt < mMaxBigrams) {
+ if (frequency > mBigramFreq[insertAt]
+ || (mBigramFreq[insertAt] == frequency
+ && length < wideStrLen(mBigramChars + insertAt * mMaxWordLength))) {
+ break;
+ }
+ insertAt++;
+ }
+ LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams);
+ if (insertAt < mMaxBigrams) {
+ memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
+ (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]),
+ (mMaxBigrams - insertAt - 1) * sizeof(mBigramFreq[0]));
+ mBigramFreq[insertAt] = frequency;
+ memmove((char*) mBigramChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
+ (char*) mBigramChars + (insertAt ) * mMaxWordLength * sizeof(short),
+ (mMaxBigrams - insertAt - 1) * sizeof(short) * mMaxWordLength);
+ unsigned short *dest = mBigramChars + (insertAt ) * mMaxWordLength;
+ while (length--) {
+ *dest++ = *word++;
+ }
+ *dest = 0; // NULL terminate
+ if (DEBUG_DICT) LOGI("Bigram: Added word at %d\n", insertAt);
+ return true;
+ }
+ return false;
+}
+
unsigned short
Dictionary::toLowerCase(unsigned short c) {
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
@@ -213,12 +299,17 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
}
for (int i = 0; i < count; i++) {
+ // -- at char
unsigned short c = getChar(&pos);
+ // -- at flag/add
unsigned short lowerC = toLowerCase(c);
bool terminal = getTerminal(&pos);
int childrenAddress = getAddress(&pos);
+ // -- after address or flag
int freq = 1;
if (terminal) freq = getFreq(&pos);
+ // -- after add or freq
+
// If we are only doing completions, no need to look at the typed characters.
if (completion) {
mWord[depth] = c;
@@ -232,7 +323,7 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
getWordsRec(childrenAddress, depth + 1, maxDepth,
completion, snr, inputIndex, diffs);
}
- } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
+ } else if ((c == QUOTE && currentChars[0] != QUOTE) || mSkipPos == depth) {
// Skip the ' or other letter and continue deeper
mWord[depth] = c;
if (childrenAddress != 0) {
@@ -270,14 +361,185 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
}
}
-bool
-Dictionary::isValidWord(unsigned short *word, int length)
+int
+Dictionary::getBigramAddress(int *pos, bool advance)
{
- return isValidWordRec(0, word, 0, length);
+ int address = 0;
+
+ address += (mDict[*pos] & 0x3F) << 16;
+ address += (mDict[*pos + 1] & 0xFF) << 8;
+ address += (mDict[*pos + 2] & 0xFF);
+
+ if (advance) {
+ *pos += 3;
+ }
+
+ return address;
+}
+
+int
+Dictionary::getBigramFreq(int *pos)
+{
+ int freq = mDict[(*pos)++] & FLAG_BIGRAM_FREQ;
+
+ return freq;
+}
+
+
+int
+Dictionary::getBigrams(unsigned short *prevWord, int prevWordLength, unsigned short *bigramChars,
+ int *bigramFreq, int maxWordLength, int maxBigrams)
+{
+ mBigramFreq = bigramFreq;
+ mBigramChars = bigramChars;
+ mMaxWordLength = maxWordLength;
+ mMaxBigrams = maxBigrams;
+
+ if (mBigram == 1 && checkIfDictVersionIsLatest()) {
+ int pos = isValidWordRec(DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength);
+ LOGI("Pos -> %d\n", pos);
+ if (pos < 0) {
+ return 0;
+ }
+
+ int bigramCount = 0;
+ int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
+ if (bigramExist > 0) {
+ int nextBigramExist = 1;
+ while (nextBigramExist > 0) {
+ int bigramAddress = getBigramAddress(&pos, true);
+ int frequency = (FLAG_BIGRAM_FREQ & mDict[pos]);
+ // search for all bigrams and store them
+ searchForTerminalNode(bigramAddress, frequency);
+ nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED);
+ bigramCount++;
+ }
+ }
+
+ return bigramCount;
+ }
+ return 0;
+}
+
+void
+Dictionary::searchForTerminalNode(int addressLookingFor, int frequency)
+{
+ // track word with such address and store it in an array
+ unsigned short word[mMaxWordLength];
+
+ int pos;
+ int followDownBranchAddress = DICTIONARY_HEADER_SIZE;
+ bool found = false;
+ char followingChar = ' ';
+ int depth = -1;
+
+ while(!found) {
+ bool followDownAddressSearchStop = false;
+ bool firstAddress = true;
+ bool haveToSearchAll = true;
+
+ if (depth >= 0) {
+ word[depth] = (unsigned short) followingChar;
+ }
+ pos = followDownBranchAddress; // pos start at count
+ int count = mDict[pos] & 0xFF;
+ LOGI("count - %d\n",count);
+ pos++;
+ for (int i = 0; i < count; i++) {
+ // pos at data
+ pos++;
+ // pos now at flag
+ if (!getFirstBitOfByte(&pos)) { // non-terminal
+ if (!followDownAddressSearchStop) {
+ int addr = getBigramAddress(&pos, false);
+ if (addr > addressLookingFor) {
+ followDownAddressSearchStop = true;
+ if (firstAddress) {
+ firstAddress = false;
+ haveToSearchAll = true;
+ } else if (!haveToSearchAll) {
+ break;
+ }
+ } else {
+ followDownBranchAddress = addr;
+ followingChar = (char)(0xFF & mDict[pos-1]);
+ if (firstAddress) {
+ firstAddress = false;
+ haveToSearchAll = false;
+ }
+ }
+ }
+ pos += 3;
+ } else if (getFirstBitOfByte(&pos)) { // terminal
+ if (addressLookingFor == (pos-1)) { // found !!
+ depth++;
+ word[depth] = (0xFF & mDict[pos-1]);
+ found = true;
+ break;
+ }
+ if (getSecondBitOfByte(&pos)) { // address + freq (4 byte)
+ if (!followDownAddressSearchStop) {
+ int addr = getBigramAddress(&pos, false);
+ if (addr > addressLookingFor) {
+ followDownAddressSearchStop = true;
+ if (firstAddress) {
+ firstAddress = false;
+ haveToSearchAll = true;
+ } else if (!haveToSearchAll) {
+ break;
+ }
+ } else {
+ followDownBranchAddress = addr;
+ followingChar = (char)(0xFF & mDict[pos-1]);
+ if (firstAddress) {
+ firstAddress = false;
+ haveToSearchAll = true;
+ }
+ }
+ }
+ pos += 4;
+ } else { // freq only (2 byte)
+ pos += 2;
+ }
+
+ // skipping bigram
+ int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
+ if (bigramExist > 0) {
+ int nextBigramExist = 1;
+ while (nextBigramExist > 0) {
+ pos += 3;
+ nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED);
+ }
+ } else {
+ pos++;
+ }
+ }
+ }
+ depth++;
+ if (followDownBranchAddress == 0) {
+ LOGI("ERROR!!! Cannot find bigram!!");
+ break;
+ }
+ }
+
+ addWordBigram(word, depth, frequency);
}
bool
+Dictionary::isValidWord(unsigned short *word, int length)
+{
+ if (checkIfDictVersionIsLatest()) {
+ return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
+ } else {
+ return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
+ }
+}
+
+int
Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
+ // returns address of bigram data of that word
+ // return -99 if not found
+
int count = getCount(&pos);
unsigned short currentChar = (unsigned short) word[offset];
for (int j = 0; j < count; j++) {
@@ -287,12 +549,13 @@ Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length
if (c == currentChar) {
if (offset == length - 1) {
if (terminal) {
- return true;
+ return (pos+1);
}
} else {
if (childPos != 0) {
- if (isValidWordRec(childPos, word, offset + 1, length)) {
- return true;
+ int t = isValidWordRec(childPos, word, offset + 1, length);
+ if (t > 0) {
+ return t;
}
}
}
@@ -303,7 +566,7 @@ Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length
// There could be two instances of each alphabet - upper and lower case. So continue
// looking ...
}
- return false;
+ return NOT_VALID_WORD;
}
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index 3749f3d88..2c574290f 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -28,12 +28,19 @@ namespace latinime {
// if the word has other endings.
#define FLAG_TERMINAL_MASK 0x80
+#define FLAG_BIGRAM_READ 0x80
+#define FLAG_BIGRAM_CHILDEXIST 0x40
+#define FLAG_BIGRAM_CONTINUED 0x80
+#define FLAG_BIGRAM_FREQ 0x7F
+
class Dictionary {
public:
Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier);
int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
int *nextLetters, int nextLettersSize);
+ int getBigrams(unsigned short *word, int length, unsigned short *outWords, int *frequencies,
+ int maxWordLength, int maxBigrams);
bool isValidWord(unsigned short *word, int length);
void setAsset(void *asset) { mAsset = asset; }
void *getAsset() { return mAsset; }
@@ -41,28 +48,40 @@ public:
private:
+ void getVersionNumber();
+ bool checkIfDictVersionIsLatest();
int getAddress(int *pos);
+ int getBigramAddress(int *pos, bool advance);
+ int getFreq(int *pos);
+ int getBigramFreq(int *pos);
+ void searchForTerminalNode(int address, int frequency);
+
+ bool getFirstBitOfByte(int *pos) { return (mDict[*pos] & 0x80) > 0; }
+ bool getSecondBitOfByte(int *pos) { return (mDict[*pos] & 0x40) > 0; }
bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
- int getFreq(int *pos) { return mDict[(*pos)++] & 0xFF; }
int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
unsigned short getChar(int *pos);
int wideStrLen(unsigned short *str);
bool sameAsTyped(unsigned short *word, int length);
bool addWord(unsigned short *word, int length, int frequency);
+ bool addWordBigram(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c);
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
int inputIndex, int diffs);
- bool isValidWordRec(int pos, unsigned short *word, int offset, int length);
+ int isValidWordRec(int pos, unsigned short *word, int offset, int length);
void registerNextLetter(unsigned short c);
unsigned char *mDict;
void *mAsset;
int *mFrequencies;
+ int *mBigramFreq;
int mMaxWords;
+ int mMaxBigrams;
int mMaxWordLength;
unsigned short *mOutputChars;
+ unsigned short *mBigramChars;
int *mInputCodes;
int mInputLength;
int mMaxAlternatives;
@@ -74,6 +93,8 @@ private:
int mTypedLetterMultiplier;
int *mNextLettersFrequencies;
int mNextLettersSize;
+ int mVersion;
+ int mBigram;
};
// ----------------------------------------------------------------------------
diff --git a/tests/data/bigramlist.xml b/tests/data/bigramlist.xml
new file mode 100644
index 000000000..dd3f2916e
--- /dev/null
+++ b/tests/data/bigramlist.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/wordlist.xml b/tests/data/wordlist.xml
index 22d0caa38..b870eb2a3 100644
--- a/tests/data/wordlist.xml
+++ b/tests/data/wordlist.xml
@@ -225,6 +225,7 @@
services
niño
María
+ car
hmmm
hon
tty
diff --git a/tests/res/raw/test.dict b/tests/res/raw/test.dict
index e789aaa9a..6a5d6d794 100644
Binary files a/tests/res/raw/test.dict and b/tests/res/raw/test.dict differ
diff --git a/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java b/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java
index 0d3babfbc..318cc7ccd 100644
--- a/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java
+++ b/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java
@@ -71,7 +71,7 @@ public class SuggestTests extends AndroidTestCase {
Log.w(TAG, "No available size for binary dictionary");
}
mSuggest.setAutoTextEnabled(false);
- mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL);
+ mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM);
}
/************************** Helper functions ************************/
@@ -108,19 +108,56 @@ public class SuggestTests extends AndroidTestCase {
private boolean isDefaultSuggestion(CharSequence typed, CharSequence expected) {
WordComposer word = createWordComposer(typed);
- List suggestions = mSuggest.getSuggestions(null, word, false);
+ List suggestions = mSuggest.getSuggestions(null, word, false, null);
+ return isDefaultSuggestion(suggestions, expected);
+ }
+
+ private void getBigramSuggestions(CharSequence previous, CharSequence typed) {
+ if(!TextUtils.isEmpty(previous) && (typed.length() > 1)) {
+ WordComposer firstChar = createWordComposer(typed.charAt(0) + "");
+ mSuggest.getSuggestions(null, firstChar, false, previous);
+ }
+ }
+
+ private boolean isDefaultNextSuggestion(CharSequence previous, CharSequence typed,
+ CharSequence expected) {
+ WordComposer word = createWordComposer(typed);
+ getBigramSuggestions(previous, typed);
+ List suggestions = mSuggest.getSuggestions(null, word, false, previous);
return isDefaultSuggestion(suggestions, expected);
}
private boolean isDefaultCorrection(CharSequence typed, CharSequence expected) {
WordComposer word = createWordComposer(typed);
- List suggestions = mSuggest.getSuggestions(null, word, false);
+ List suggestions = mSuggest.getSuggestions(null, word, false, null);
+ return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection();
+ }
+
+ private boolean isDefaultNextCorrection(CharSequence previous, CharSequence typed,
+ CharSequence expected) {
+ WordComposer word = createWordComposer(typed);
+ getBigramSuggestions(previous, typed);
+ List suggestions = mSuggest.getSuggestions(null, word, false, previous);
+ for(int i=0;i suggestions = mSuggest.getSuggestions(null, word, false);
+ List suggestions = mSuggest.getSuggestions(null, word, false, null);
+ for (int i = 1; i < suggestions.size(); i++) {
+ if (TextUtils.equals(suggestions.get(i), expected)) return true;
+ }
+ return false;
+ }
+
+ private boolean isASuggestion(CharSequence previous, CharSequence typed,
+ CharSequence expected) {
+ WordComposer word = createWordComposer(typed);
+ getBigramSuggestions(previous, typed);
+ List suggestions = mSuggest.getSuggestions(null, word, false, previous);
for (int i = 1; i < suggestions.size(); i++) {
if (TextUtils.equals(suggestions.get(i), expected)) return true;
}
@@ -248,4 +285,26 @@ public class SuggestTests extends AndroidTestCase {
// Mara
assertTrue(isDefaultCorrection("maria", "Mar\u00EDa"));
}
+
+ /**
+ * Make sure bigrams are showing when first character is typed
+ * and don't show any when there aren't any
+ */
+ public void testBigramsAtFirstChar() {
+ assertTrue(isDefaultNextCorrection("about", "p", "part"));
+ assertTrue(isDefaultNextCorrection("I'm", "a", "about"));
+ assertTrue(isDefaultNextCorrection("about", "b", "business"));
+ assertTrue(isASuggestion("about", "b", "being"));
+ assertFalse(isDefaultNextSuggestion("about", "p", "business"));
+ }
+
+ /**
+ * Make sure bigrams score affects the original score
+ */
+ public void testBigramsScoreEffect() {
+ assertTrue(isDefaultCorrection("pa", "page"));
+ assertTrue(isDefaultNextCorrection("about", "pa", "part"));
+ assertTrue(isDefaultCorrection("sa", "said"));
+ assertTrue(isDefaultNextCorrection("from", "sa", "same"));
+ }
}