From 937d5ad0131267aa4273f3e5d75b203a1f263c18 Mon Sep 17 00:00:00 2001
From: Jae Yong Sung <jysung@google.com>
Date: Wed, 30 Jun 2010 20:28:04 -0700
Subject: [PATCH] added bigram prediction   - after first character, only
 suggests bigram data (but doesn't autocomplete)   - after second character,
 words from dictionary gets rearranged by using bigram   - compatible with old
 dictionary   - added preference option to disable bigram

Change-Id: Ia8f4e8fa55e797e86d858fd499887cd396388411
---
 java/res/values/strings.xml                   |   5 +
 java/res/xml/prefs.xml                        |  10 +-
 .../inputmethod/latin/BinaryDictionary.java   |  34 ++-
 .../android/inputmethod/latin/Dictionary.java |  29 +-
 .../inputmethod/latin/EditingUtil.java        |  16 +
 .../latin/ExpandableDictionary.java           |   5 +-
 .../android/inputmethod/latin/LatinIME.java   |  16 +-
 .../android/inputmethod/latin/Suggest.java    | 167 +++++++++--
 ...oid_inputmethod_latin_BinaryDictionary.cpp |  28 +-
 native/src/dictionary.cpp                     | 281 +++++++++++++++++-
 native/src/dictionary.h                       |  25 +-
 tests/data/bigramlist.xml                     |  36 +++
 tests/data/wordlist.xml                       |   1 +
 tests/res/raw/test.dict                       | Bin 2562 -> 2829 bytes
 .../inputmethod/latin/tests/SuggestTests.java |  73 ++++-
 15 files changed, 663 insertions(+), 63 deletions(-)
 create mode 100644 tests/data/bigramlist.xml
diff --git a/java/res/values/strings.xml b/java/res/values/strings.xml
index 35dd3e089..70a5b7e2e 100644
--- a/java/res/values/strings.xml
+++ b/java/res/values/strings.xml
@@ -85,6 +85,11 @@
     <!-- Description for auto completion -->
     <string name="auto_complete_summary">Spacebar and punctuation automatically insert highlighted word</string>
     
+    <!-- Option to enable bigram completion -->
+    <string name="bigram_suggestion">Bigram Suggestions</string>
+    <!-- Description for auto completion -->
+    <string name="bigram_suggestion_summary">Use previous word to improve suggestion</string>
+
     <!-- Array of prediction modes -->
     <string-array name="prediction_modes">
         <item>None</item>
diff --git a/java/res/xml/prefs.xml b/java/res/xml/prefs.xml
index 535b63f3b..c93fe0ac3 100644
--- a/java/res/xml/prefs.xml
+++ b/java/res/xml/prefs.xml
@@ -81,6 +81,14 @@
             android:defaultValue="@bool/enable_autocorrect"
             android:dependency="show_suggestions"
             />
-            
+
+        <CheckBoxPreference
+            android:key="bigram_suggestion"
+            android:title="@string/bigram_suggestion"
+            android:summary="@string/bigram_suggestion_summary"
+            android:persistent="true"
+            android:defaultValue="true"
+            android:dependency="auto_complete"
+            />
     </PreferenceCategory>            
 </PreferenceScreen>
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 6473f4558..8d2363012 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -33,9 +33,9 @@ import android.util.Log;
 public class BinaryDictionary extends Dictionary {
 
     private static final String TAG = "BinaryDictionary";
-    public static final int MAX_WORD_LENGTH = 48;
     private static final int MAX_ALTERNATIVES = 16;
     private static final int MAX_WORDS = 16;
+    private static final int MAX_BIGRAMS = 255; // TODO Probably don't need all 255
 
     private static final int TYPED_LETTER_MULTIPLIER = 2;
     private static final boolean ENABLE_MISSED_CHARACTERS = true;
@@ -44,7 +44,9 @@ public class BinaryDictionary extends Dictionary {
     private int mDictLength;
     private int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES];
     private char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS];
+    private char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS];
     private int[] mFrequencies = new int[MAX_WORDS];
+    private int[] mFrequencies_bigrams = new int[MAX_BIGRAMS];
     // Keep a reference to the native dict direct buffer in Java to avoid
     // unexpected deallocation of the direct buffer.
     private ByteBuffer mNativeDictDirectBuffer;
@@ -71,7 +73,7 @@ public class BinaryDictionary extends Dictionary {
     /**
      * Create a dictionary from a byte buffer. This is used for testing.
      * @param context application context for reading resources
-     * @param resId the resource containing the raw binary dictionary
+     * @param byteBuffer a ByteBuffer containing the binary dictionary
      */
     public BinaryDictionary(Context context, ByteBuffer byteBuffer) {
         if (byteBuffer != null) {
@@ -95,6 +97,8 @@ public class BinaryDictionary extends Dictionary {
             char[] outputChars, int[] frequencies,
             int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
             int[] nextLettersFrequencies, int nextLettersSize);
+    private native int getBigramsNative(int nativeData, char[] prevWord, int prevWordLength,
+            char[] outputChars, int[] frequencies, int maxWordLength, int maxBigrams);
 
     private final void loadDictionary(Context context, int resId) {
         InputStream is = context.getResources().openRawResource(resId);
@@ -121,6 +125,30 @@ public class BinaryDictionary extends Dictionary {
         }
     }
 
+    @Override
+    public void getBigrams(final WordComposer composer, final CharSequence previousWord,
+            final WordCallback callback, int[] nextLettersFrequencies) {
+
+        char[] chars = previousWord.toString().toCharArray();
+        Arrays.fill(mOutputChars_bigrams, (char) 0);
+        Arrays.fill(mFrequencies_bigrams, 0);
+
+        int count = getBigramsNative(mNativeDict, chars, chars.length, mOutputChars_bigrams,
+                mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS);
+        for (int j = 0; j < count; j++) {
+            if (mFrequencies_bigrams[j] < 1) break;
+            int start = j * MAX_WORD_LENGTH;
+            int len = 0;
+            while (mOutputChars_bigrams[start + len] != 0) {
+                len++;
+            }
+            if (len > 0) {
+                callback.addWord(mOutputChars_bigrams, start, len, mFrequencies_bigrams[j],
+                        DataType.BIGRAM);
+            }
+        }
+    }
+
     @Override
     public void getWords(final WordComposer codes, final WordCallback callback,
             int[] nextLettersFrequencies) {
@@ -166,7 +194,7 @@ public class BinaryDictionary extends Dictionary {
                 len++;
             }
             if (len > 0) {
-                callback.addWord(mOutputChars, start, len, mFrequencies[j]);
+                callback.addWord(mOutputChars, start, len, mFrequencies[j], DataType.UNIGRAM);
             }
         }
     }
diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java
index e7b526663..54317c861 100644
--- a/java/src/com/android/inputmethod/latin/Dictionary.java
+++ b/java/src/com/android/inputmethod/latin/Dictionary.java
@@ -21,7 +21,9 @@ package com.android.inputmethod.latin;
  * strokes.
  */
 abstract public class Dictionary {
-    
+
+    protected static final int MAX_WORD_LENGTH = 48;
+
     /**
      * Whether or not to replicate the typed word in the suggested list, even if it's valid.
      */
@@ -31,7 +33,11 @@ abstract public class Dictionary {
      * The weight to give to a word if it's length is the same as the number of typed characters.
      */
     protected static final int FULL_WORD_FREQ_MULTIPLIER = 2;
-    
+
+    public static enum DataType {
+        UNIGRAM, BIGRAM
+    }
+
     /**
      * Interface to be implemented by classes requesting words to be fetched from the dictionary.
      * @see #getWords(WordComposer, WordCallback)
@@ -45,9 +51,11 @@ abstract public class Dictionary {
          * @param wordLength length of valid characters in the character array
          * @param frequency the frequency of occurence. This is normalized between 1 and 255, but
          * can exceed those limits
+         * @param dataType tells type of this data
          * @return true if the word was added, false if no more words are required
          */
-        boolean addWord(char[] word, int wordOffset, int wordLength, int frequency);
+        boolean addWord(char[] word, int wordOffset, int wordLength, int frequency,
+                DataType dataType);
     }
 
     /**
@@ -64,6 +72,21 @@ abstract public class Dictionary {
     abstract public void getWords(final WordComposer composer, final WordCallback callback,
             int[] nextLettersFrequencies);
 
+    /**
+     * Searches for pairs in the bigram dictionary that matches the previous word and all the
+     * possible words following are added through the callback object.
+     * @param composer the key sequence to match
+     * @param callback the callback object to send possible word following previous word
+     * @param nextLettersFrequencies array of frequencies of next letters that could follow the
+     *        word so far. For instance, "bracke" can be followed by "t", so array['t'] will have
+     *        a non-zero value on returning from this method.
+     *        Pass in null if you don't want the dictionary to look up next letters.
+     */
+    public void getBigrams(final WordComposer composer, final CharSequence previousWord,
+            final WordCallback callback, int[] nextLettersFrequencies) {
+        // empty base implementation
+    }
+
     /**
      * Checks if the given word occurs in the dictionary
      * @param word the word to search for. The search should be case-insensitive.
diff --git a/java/src/com/android/inputmethod/latin/EditingUtil.java b/java/src/com/android/inputmethod/latin/EditingUtil.java
index 7571f1daf..5133c60ca 100644
--- a/java/src/com/android/inputmethod/latin/EditingUtil.java
+++ b/java/src/com/android/inputmethod/latin/EditingUtil.java
@@ -16,6 +16,8 @@
 
 package com.android.inputmethod.latin;
 
+import java.util.regex.Pattern;
+
 import android.view.inputmethod.ExtractedText;
 import android.view.inputmethod.ExtractedTextRequest;
 import android.view.inputmethod.InputConnection;
@@ -24,6 +26,11 @@ import android.view.inputmethod.InputConnection;
  * Utility methods to deal with editing text through an InputConnection.
  */
 public class EditingUtil {
+    /**
+     * Number of characters we want to look back in order to identify the previous word
+     */
+    public static final int LOOKBACK_CHARACTER_NUM = 15;
+
     private EditingUtil() {};
 
     /**
@@ -175,4 +182,13 @@ public class EditingUtil {
     private static boolean isWhitespace(int code, String whitespace) {
         return whitespace.contains(String.valueOf((char) code));
     }
+
+    private static final Pattern spaceRegex = Pattern.compile("\\s+");
+
+    public static CharSequence getPreviousWord(InputConnection connection) {
+        //TODO: Should fix this. This could be slow!
+        CharSequence prev = connection.getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0);
+        String[] w = spaceRegex.split(prev);
+        return (w.length >= 2) ? w[w.length-2] : null;
+    }
 }
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
index 46bc41c42..6f4d925ee 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
@@ -267,7 +267,7 @@ public class ExpandableDictionary extends Dictionary {
             if (completion) {
                 word[depth] = c;
                 if (terminal) {
-                    if (!callback.addWord(word, 0, depth + 1, freq * snr)) {
+                    if (!callback.addWord(word, 0, depth + 1, freq * snr, DataType.UNIGRAM)) {
                         return;
                     }
                     // Add to frequency of next letters for predictive correction
@@ -305,7 +305,8 @@ public class ExpandableDictionary extends Dictionary {
                                         || !same(word, depth + 1, codes.getTypedWord())) {
                                     int finalFreq = freq * snr * addedAttenuation;
                                     if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
-                                    callback.addWord(word, 0, depth + 1, finalFreq);
+                                    callback.addWord(word, 0, depth + 1, finalFreq,
+                                            DataType.UNIGRAM);
                                 }
                             }
                             if (children != null) {
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index b1b6d9228..51fb9d876 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -89,6 +89,7 @@ public class LatinIME extends InputMethodService
     private static final String PREF_QUICK_FIXES = "quick_fixes";
     private static final String PREF_SHOW_SUGGESTIONS = "show_suggestions";
     private static final String PREF_AUTO_COMPLETE = "auto_complete";
+    private static final String PREF_BIGRAM_SUGGESTIONS = "bigram_suggestion";
     private static final String PREF_VOICE_MODE = "voice_mode";
 
     // Whether or not the user has used voice input before (and thus, whether to show the
@@ -187,6 +188,7 @@ public class LatinIME extends InputMethodService
     private boolean mAutoSpace;
     private boolean mJustAddedAutoSpace;
     private boolean mAutoCorrectEnabled;
+    private boolean mBigramSuggestionEnabled;
     private boolean mAutoCorrectOn;
     private boolean mCapsLock;
     private boolean mPasswordText;
@@ -1538,7 +1540,7 @@ public class LatinIME extends InputMethodService
     }
 
     private List<CharSequence> getTypedSuggestions(WordComposer word) {
-        List<CharSequence> stringList = mSuggest.getSuggestions(mInputView, word, false);
+        List<CharSequence> stringList = mSuggest.getSuggestions(mInputView, word, false, null);
         return stringList;
     }
 
@@ -1549,7 +1551,14 @@ public class LatinIME extends InputMethodService
     }
 
     private void showSuggestions(WordComposer word) {
-        List<CharSequence> stringList = mSuggest.getSuggestions(mInputView, word, false);
+        //long startTime = System.currentTimeMillis(); // TIME MEASUREMENT!
+        // TODO Maybe need better way of retrieving previous word
+        CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection());
+        List<CharSequence> stringList = mSuggest.getSuggestions(mInputView, word, false,
+                prevWord);
+        //long stopTime = System.currentTimeMillis(); // TIME MEASUREMENT!
+        //Log.d("LatinIME","Suggest Total Time - " + (stopTime - startTime));
+
         int[] nextLettersFrequencies = mSuggest.getNextLettersFrequencies();
 
         ((LatinKeyboard) mInputView.getKeyboard()).setPreferredLetters(nextLettersFrequencies);
@@ -2088,6 +2097,8 @@ public class LatinIME extends InputMethodService
         mCorrectionMode = (mAutoCorrectOn && mAutoCorrectEnabled)
                 ? Suggest.CORRECTION_FULL
                 : (mAutoCorrectOn ? Suggest.CORRECTION_BASIC : Suggest.CORRECTION_NONE);
+        mCorrectionMode = (mBigramSuggestionEnabled && mAutoCorrectOn && mAutoCorrectEnabled)
+                ? Suggest.CORRECTION_FULL_BIGRAM : mCorrectionMode;
         if (mSuggest != null) {
             mSuggest.setCorrectionMode(mCorrectionMode);
         }
@@ -2154,6 +2165,7 @@ public class LatinIME extends InputMethodService
         }
         mAutoCorrectEnabled = sp.getBoolean(PREF_AUTO_COMPLETE,
                 mResources.getBoolean(R.bool.enable_autocorrect)) & mShowSuggestions;
+        mBigramSuggestionEnabled = sp.getBoolean(PREF_BIGRAM_SUGGESTIONS, true) & mShowSuggestions;
         updateCorrectionMode();
         updateAutoTextEnabled(mResources.getConfiguration().locale);
         mLanguageSwitcher.loadLocales(sp);
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index 010913d6d..3e6090c72 100755
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -37,6 +37,21 @@ public class Suggest implements Dictionary.WordCallback {
     public static final int CORRECTION_NONE = 0;
     public static final int CORRECTION_BASIC = 1;
     public static final int CORRECTION_FULL = 2;
+    public static final int CORRECTION_FULL_BIGRAM = 3;
+
+    /**
+     * Words that appear in both bigram and unigram data gets multiplier ranging from
+     * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the frequency score from
+     * bigram data.
+     */
+    public static final double BIGRAM_MULTIPLIER_MIN = 1.2;
+    public static final double BIGRAM_MULTIPLIER_MAX = 1.5;
+
+    /**
+     * Maximum possible bigram frequency. Will depend on how many bits are being used in data
+     * structure. Maximum bigram freqeuncy will get the BIGRAM_MULTIPLIER_MAX as the multiplier.
+     */
+    public static final int MAXIMUM_BIGRAM_FREQUENCY = 127;
 
     static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
 
@@ -49,10 +64,13 @@ public class Suggest implements Dictionary.WordCallback {
     private Dictionary mContactsDictionary;
 
     private int mPrefMaxSuggestions = 12;
+    private int mPrefMaxBigrams = 255;
 
     private boolean mAutoTextEnabled;
 
     private int[] mPriorities = new int[mPrefMaxSuggestions];
+    private int[] mBigramPriorities = new int[mPrefMaxBigrams];
+
     // Handle predictive correction for only the first 1280 characters for performance reasons
     // If we support scripts that need latin characters beyond that, we should probably use some
     // kind of a sparse array or language specific list with a mapping lookup table.
@@ -60,6 +78,7 @@ public class Suggest implements Dictionary.WordCallback {
     // latin characters.
     private int[] mNextLettersFrequencies = new int[1280];
     private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
+    private ArrayList<CharSequence> mBigramSuggestions  = new ArrayList<CharSequence>();
     private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>();
     private boolean mHaveCorrection;
     private CharSequence mOriginalWord;
@@ -80,7 +99,7 @@ public class Suggest implements Dictionary.WordCallback {
 
     private void initPool() {
         for (int i = 0; i < mPrefMaxSuggestions; i++) {
-            StringBuilder sb = new StringBuilder(32);
+            StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH);
             mStringPool.add(sb);
         }
     }
@@ -132,9 +151,10 @@ public class Suggest implements Dictionary.WordCallback {
         }
         mPrefMaxSuggestions = maxSuggestions;
         mPriorities = new int[mPrefMaxSuggestions];
-        collectGarbage();
+        mBigramPriorities = new int[mPrefMaxBigrams];
+        collectGarbage(mSuggestions, mPrefMaxSuggestions);
         while (mStringPool.size() < mPrefMaxSuggestions) {
-            StringBuilder sb = new StringBuilder(32);
+            StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH);
             mStringPool.add(sb);
         }
     }
@@ -169,17 +189,16 @@ public class Suggest implements Dictionary.WordCallback {
     /**
      * Returns a list of words that match the list of character codes passed in.
      * This list will be overwritten the next time this function is called.
-     * @param a view for retrieving the context for AutoText
-     * @param codes the list of codes. Each list item contains an array of character codes
-     * in order of probability where the character at index 0 in the array has the highest 
-     * probability. 
+     * @param view a view for retrieving the context for AutoText
+     * @param wordComposer contains what is currently being typed
+     * @param prevWordForBigram previous word (used only for bigram)
      * @return list of suggestions.
      */
     public List<CharSequence> getSuggestions(View view, WordComposer wordComposer, 
-            boolean includeTypedWordIfValid) {
+            boolean includeTypedWordIfValid, CharSequence prevWordForBigram) {
         mHaveCorrection = false;
         mCapitalize = wordComposer.isCapitalized();
-        collectGarbage();
+        collectGarbage(mSuggestions, mPrefMaxSuggestions);
         Arrays.fill(mPriorities, 0);
         Arrays.fill(mNextLettersFrequencies, 0);
 
@@ -191,8 +210,39 @@ public class Suggest implements Dictionary.WordCallback {
         } else {
             mLowerOriginalWord = "";
         }
-        // Search the dictionary only if there are at least 2 characters
-        if (wordComposer.size() > 1) {
+
+        if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM
+                || mCorrectionMode == CORRECTION_BASIC)) {
+            // At first character, just get the bigrams
+            Arrays.fill(mBigramPriorities, 0);
+            collectGarbage(mBigramSuggestions, mPrefMaxBigrams);
+
+            if (!TextUtils.isEmpty(prevWordForBigram)) {
+                CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
+                if (mMainDict.isValidWord(lowerPrevWord)) {
+                    prevWordForBigram = lowerPrevWord;
+                }
+                mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
+                        mNextLettersFrequencies);
+                char currentChar = wordComposer.getTypedWord().charAt(0);
+                int count = 0;
+                int bigramSuggestionSize = mBigramSuggestions.size();
+                for (int i = 0; i < bigramSuggestionSize; i++) {
+                    if (mBigramSuggestions.get(i).charAt(0) == currentChar) {
+                        int poolSize = mStringPool.size();
+                        StringBuilder sb = poolSize > 0 ?
+                                (StringBuilder) mStringPool.remove(poolSize - 1)
+                                : new StringBuilder(Dictionary.MAX_WORD_LENGTH);
+                        sb.setLength(0);
+                        sb.append(mBigramSuggestions.get(i));
+                        mSuggestions.add(count++, sb);
+                        if (count > mPrefMaxSuggestions) break;
+                    }
+                }
+            }
+
+        } else if (wordComposer.size() > 1) {
+            // Search the dictionary only if there are at least 2 characters
             if (mUserDictionary != null || mContactsDictionary != null) {
                 if (mUserDictionary != null) {
                     mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
@@ -202,21 +252,26 @@ public class Suggest implements Dictionary.WordCallback {
                 }
 
                 if (mSuggestions.size() > 0 && isValidWord(mOriginalWord)
-                        && mCorrectionMode == CORRECTION_FULL) {
+                        && (mCorrectionMode == CORRECTION_FULL
+                        || mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
                     mHaveCorrection = true;
                 }
             }
             mMainDict.getWords(wordComposer, this, mNextLettersFrequencies);
-            if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 0) {
+            if ((mCorrectionMode == CORRECTION_FULL || mCorrectionMode == CORRECTION_FULL_BIGRAM)
+                    && mSuggestions.size() > 0) {
                 mHaveCorrection = true;
             }
         }
+
         if (mOriginalWord != null) {
             mSuggestions.add(0, mOriginalWord.toString());
         }
-        
+
         // Check if the first suggestion has a minimum number of characters in common
-        if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 1) {
+        if (wordComposer.size() > 1 && mSuggestions.size() > 1
+                && (mCorrectionMode == CORRECTION_FULL
+                || mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
             if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) {
                 mHaveCorrection = false;
             }
@@ -247,7 +302,6 @@ public class Suggest implements Dictionary.WordCallback {
                 i++;
             }
         }
-
         removeDupes();
         return mSuggestions;
     }
@@ -301,20 +355,50 @@ public class Suggest implements Dictionary.WordCallback {
         return false;
     }
 
-    public boolean addWord(final char[] word, final int offset, final int length, final int freq) {
+    public boolean addWord(final char[] word, final int offset, final int length, int freq,
+            final Dictionary.DataType dataType) {
+        ArrayList<CharSequence> suggestions;
+        int[] priorities;
+        int prefMaxSuggestions;
+        if(dataType == Dictionary.DataType.BIGRAM) {
+            suggestions = mBigramSuggestions;
+            priorities = mBigramPriorities;
+            prefMaxSuggestions = mPrefMaxBigrams;
+        } else {
+            suggestions = mSuggestions;
+            priorities = mPriorities;
+            prefMaxSuggestions = mPrefMaxSuggestions;
+        }
+
         int pos = 0;
-        final int[] priorities = mPriorities;
-        final int prefMaxSuggestions = mPrefMaxSuggestions;
+
         // Check if it's the same word, only caps are different
         if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) {
             pos = 0;
         } else {
+            if (dataType == Dictionary.DataType.UNIGRAM) {
+                // Check if the word was already added before (by bigram data)
+                int bigramSuggestion = searchBigramSuggestion(word,offset,length);
+                if(bigramSuggestion >= 0) {
+                    // turn freq from bigram into multiplier specified above
+                    double multiplier = (((double) mBigramPriorities[bigramSuggestion])
+                            / MAXIMUM_BIGRAM_FREQUENCY)
+                            * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN)
+                            + BIGRAM_MULTIPLIER_MIN;
+                    /* Log.d("Suggest","bigram num: " + bigramSuggestion
+                            + "  wordB: " + mBigramSuggestions.get(bigramSuggestion).toString()
+                            + "  currentPriority: " + freq + "  bigramPriority: "
+                            + mBigramPriorities[bigramSuggestion]
+                            + "  multiplier: " + multiplier); */
+                    freq = (int)Math.round((freq * multiplier));
+                }
+            }
+
             // Check the last one's priority and bail
             if (priorities[prefMaxSuggestions - 1] >= freq) return true;
             while (pos < prefMaxSuggestions) {
                 if (priorities[pos] < freq
-                        || (priorities[pos] == freq && length < mSuggestions
-                                .get(pos).length())) {
+                        || (priorities[pos] == freq && length < suggestions.get(pos).length())) {
                     break;
                 }
                 pos++;
@@ -324,12 +408,13 @@ public class Suggest implements Dictionary.WordCallback {
         if (pos >= prefMaxSuggestions) {
             return true;
         }
+
         System.arraycopy(priorities, pos, priorities, pos + 1,
                 prefMaxSuggestions - pos - 1);
         priorities[pos] = freq;
         int poolSize = mStringPool.size();
         StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) 
-                : new StringBuilder(32);
+                : new StringBuilder(Dictionary.MAX_WORD_LENGTH);
         sb.setLength(0);
         if (mCapitalize) {
             sb.append(Character.toUpperCase(word[offset]));
@@ -339,9 +424,9 @@ public class Suggest implements Dictionary.WordCallback {
         } else {
             sb.append(word, offset, length);
         }
-        mSuggestions.add(pos, sb);
-        if (mSuggestions.size() > prefMaxSuggestions) {
-            CharSequence garbage = mSuggestions.remove(prefMaxSuggestions);
+        suggestions.add(pos, sb);
+        if (suggestions.size() > prefMaxSuggestions) {
+            CharSequence garbage = suggestions.remove(prefMaxSuggestions);
             if (garbage instanceof StringBuilder) {
                 mStringPool.add(garbage);
             }
@@ -349,6 +434,26 @@ public class Suggest implements Dictionary.WordCallback {
         return true;
     }
 
+    private int searchBigramSuggestion(final char[] word, final int offset, final int length) {
+        // TODO This is almost O(n^2). Might need fix.
+        // search whether the word appeared in bigram data
+        int bigramSuggestSize = mBigramSuggestions.size();
+        for(int i = 0; i < bigramSuggestSize; i++) {
+            if(mBigramSuggestions.get(i).length() == length) {
+                boolean chk = true;
+                for(int j = 0; j < length; j++) {
+                    if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) {
+                        chk = false;
+                        break;
+                    }
+                }
+                if(chk) return i;
+            }
+        }
+
+        return -1;
+    }
+
     public boolean isValidWord(final CharSequence word) {
         if (word == null || word.length() == 0) {
             return false;
@@ -359,21 +464,21 @@ public class Suggest implements Dictionary.WordCallback {
                 || (mContactsDictionary != null && mContactsDictionary.isValidWord(word));
     }
     
-    private void collectGarbage() {
+    private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) {
         int poolSize = mStringPool.size();
-        int garbageSize = mSuggestions.size();
-        while (poolSize < mPrefMaxSuggestions && garbageSize > 0) {
-            CharSequence garbage = mSuggestions.get(garbageSize - 1);
+        int garbageSize = suggestions.size();
+        while (poolSize < prefMaxSuggestions && garbageSize > 0) {
+            CharSequence garbage = suggestions.get(garbageSize - 1);
             if (garbage != null && garbage instanceof StringBuilder) {
                 mStringPool.add(garbage);
                 poolSize++;
             }
             garbageSize--;
         }
-        if (poolSize == mPrefMaxSuggestions + 1) {
+        if (poolSize == prefMaxSuggestions + 1) {
             Log.w("Suggest", "String pool got too big: " + poolSize);
         }
-        mSuggestions.clear();
+        suggestions.clear();
     }
 
     public void close() {
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index bb45cb538..4fe80da69 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -59,8 +59,7 @@ static int latinime_BinaryDictionary_getSuggestions(
         jint maxAlternatives, jint skipPos, jintArray nextLettersArray, jint nextLettersSize)
 {
     Dictionary *dictionary = (Dictionary*) dict;
-    if (dictionary == NULL)
-        return 0;
+    if (dictionary == NULL) return 0;
 
     int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
     int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
@@ -81,6 +80,28 @@ static int latinime_BinaryDictionary_getSuggestions(
     return count;
 }
 
+static int latinime_BinaryDictionary_getBigrams
+        (JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength,
+         jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams)
+{
+    Dictionary *dictionary = (Dictionary*) dict;
+    if (dictionary == NULL) return 0;
+
+    jchar *word = env->GetCharArrayElements(wordArray, NULL);
+    jchar *outputChars = env->GetCharArrayElements(outputArray, NULL);
+    int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
+
+    int count = dictionary->getBigrams((unsigned short*) word, wordLength,
+            (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams);
+
+    env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
+    env->ReleaseCharArrayElements(outputArray, outputChars, 0);
+    env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
+
+    return count;
+}
+
+
 static jboolean latinime_BinaryDictionary_isValidWord
         (JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength)
 {
@@ -108,7 +129,8 @@ static JNINativeMethod gMethods[] = {
                                           (void*)latinime_BinaryDictionary_open},
     {"closeNative",          "(I)V",            (void*)latinime_BinaryDictionary_close},
     {"getSuggestionsNative", "(I[II[C[IIIII[II)I",  (void*)latinime_BinaryDictionary_getSuggestions},
-    {"isValidWordNative",    "(I[CI)Z",         (void*)latinime_BinaryDictionary_isValidWord}
+    {"isValidWordNative",    "(I[CI)Z",         (void*)latinime_BinaryDictionary_isValidWord},
+    {"getBigramsNative",    "(I[CI[C[III)I",         (void*)latinime_BinaryDictionary_getBigrams}
 };
 
 static int registerNativeMethods(JNIEnv* env, const char* className,
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
index e75beb5b7..a1a632faa 100644
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@@ -19,6 +19,7 @@
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <string.h>
+//#define LOG_TAG "dictionary.cpp"
 //#include <cutils/log.h>
 #define LOGI
 
@@ -27,6 +28,9 @@
 #include "char_utils.h"
 
 #define DEBUG_DICT 0
+#define DICTIONARY_VERSION_MIN 200
+#define DICTIONARY_HEADER_SIZE 2
+#define NOT_VALID_WORD -99
 
 namespace latinime {
 
@@ -35,6 +39,7 @@ Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultip
     mDict = (unsigned char*) dict;
     mTypedLetterMultiplier = typedLetterMultiplier;
     mFullWordMultiplier = fullWordMultiplier;
+    getVersionNumber();
 }
 
 Dictionary::~Dictionary()
@@ -58,7 +63,11 @@ int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWor
     mNextLettersFrequencies = nextLetters;
     mNextLettersSize = nextLettersSize;
 
-    getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
+    if (checkIfDictVersionIsLatest()) {
+        getWordsRec(DICTIONARY_HEADER_SIZE, 0, mInputLength * 3, false, 1, 0, 0);
+    } else {
+        getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
+    }
 
     // Get the word count
     suggWords = 0;
@@ -85,6 +94,21 @@ Dictionary::registerNextLetter(unsigned short c)
     }
 }
 
+void
+Dictionary::getVersionNumber()
+{
+    mVersion = (mDict[0] & 0xFF);
+    mBigram = (mDict[1] & 0xFF);
+    LOGI("IN NATIVE SUGGEST Version: %d Bigram : %d \n", mVersion, mBigram);
+}
+
+// Checks whether it has the latest dictionary or the old dictionary
+bool
+Dictionary::checkIfDictVersionIsLatest()
+{
+    return (mVersion >= DICTIONARY_VERSION_MIN) && (mBigram == 1 || mBigram == 0);
+}
+
 unsigned short
 Dictionary::getChar(int *pos)
 {
@@ -112,6 +136,28 @@ Dictionary::getAddress(int *pos)
     return address;
 }
 
+int
+Dictionary::getFreq(int *pos)
+{
+    int freq = mDict[(*pos)++] & 0xFF;
+
+    if (checkIfDictVersionIsLatest()) {
+        // skipping bigram
+        int bigramExist = (mDict[*pos] & FLAG_BIGRAM_READ);
+        if (bigramExist > 0) {
+            int nextBigramExist = 1;
+            while (nextBigramExist > 0) {
+                (*pos) += 3;
+                nextBigramExist = (mDict[(*pos)++] & FLAG_BIGRAM_CONTINUED);
+            }
+        } else {
+            (*pos)++;
+        }
+    }
+
+    return freq;
+}
+
 int
 Dictionary::wideStrLen(unsigned short *str)
 {
@@ -161,6 +207,46 @@ Dictionary::addWord(unsigned short *word, int length, int frequency)
     return false;
 }
 
+bool
+Dictionary::addWordBigram(unsigned short *word, int length, int frequency)
+{
+    word[length] = 0;
+    if (DEBUG_DICT) {
+        char s[length + 1];
+        for (int i = 0; i <= length; i++) s[i] = word[i];
+        LOGI("Bigram: Found word = %s, freq = %d : \n", s, frequency);
+    }
+
+    // Find the right insertion point
+    int insertAt = 0;
+    while (insertAt < mMaxBigrams) {
+        if (frequency > mBigramFreq[insertAt]
+                 || (mBigramFreq[insertAt] == frequency
+                     && length < wideStrLen(mBigramChars + insertAt * mMaxWordLength))) {
+            break;
+        }
+        insertAt++;
+    }
+    LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams);
+    if (insertAt < mMaxBigrams) {
+        memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
+               (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]),
+               (mMaxBigrams - insertAt - 1) * sizeof(mBigramFreq[0]));
+        mBigramFreq[insertAt] = frequency;
+        memmove((char*) mBigramChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
+               (char*) mBigramChars + (insertAt    ) * mMaxWordLength * sizeof(short),
+               (mMaxBigrams - insertAt - 1) * sizeof(short) * mMaxWordLength);
+        unsigned short *dest = mBigramChars + (insertAt    ) * mMaxWordLength;
+        while (length--) {
+            *dest++ = *word++;
+        }
+        *dest = 0; // NULL terminate
+        if (DEBUG_DICT) LOGI("Bigram: Added word at %d\n", insertAt);
+        return true;
+    }
+    return false;
+}
+
 unsigned short
 Dictionary::toLowerCase(unsigned short c) {
     if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
@@ -213,12 +299,17 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
     }
 
     for (int i = 0; i < count; i++) {
+        // -- at char
         unsigned short c = getChar(&pos);
+        // -- at flag/add
         unsigned short lowerC = toLowerCase(c);
         bool terminal = getTerminal(&pos);
         int childrenAddress = getAddress(&pos);
+        // -- after address or flag
         int freq = 1;
         if (terminal) freq = getFreq(&pos);
+        // -- after add or freq
+
         // If we are only doing completions, no need to look at the typed characters.
         if (completion) {
             mWord[depth] = c;
@@ -232,7 +323,7 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
                 getWordsRec(childrenAddress, depth + 1, maxDepth,
                             completion, snr, inputIndex, diffs);
             }
-        } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
+        } else if ((c == QUOTE && currentChars[0] != QUOTE) || mSkipPos == depth) {
             // Skip the ' or other letter and continue deeper
             mWord[depth] = c;
             if (childrenAddress != 0) {
@@ -270,14 +361,185 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
     }
 }
 
-bool
-Dictionary::isValidWord(unsigned short *word, int length)
+int
+Dictionary::getBigramAddress(int *pos, bool advance)
 {
-    return isValidWordRec(0, word, 0, length);
+    int address = 0;
+
+    address += (mDict[*pos] & 0x3F) << 16;
+    address += (mDict[*pos + 1] & 0xFF) << 8;
+    address += (mDict[*pos + 2] & 0xFF);
+
+    if (advance) {
+        *pos += 3;
+    }
+
+    return address;
+}
+
+int
+Dictionary::getBigramFreq(int *pos)
+{
+    int freq = mDict[(*pos)++] & FLAG_BIGRAM_FREQ;
+
+    return freq;
+}
+
+
+int
+Dictionary::getBigrams(unsigned short *prevWord, int prevWordLength, unsigned short *bigramChars,
+                       int *bigramFreq, int maxWordLength, int maxBigrams)
+{
+    mBigramFreq = bigramFreq;
+    mBigramChars = bigramChars;
+    mMaxWordLength = maxWordLength;
+    mMaxBigrams = maxBigrams;
+
+    if (mBigram == 1 && checkIfDictVersionIsLatest()) {
+        int pos = isValidWordRec(DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength);
+        LOGI("Pos -> %d\n", pos);
+        if (pos < 0) {
+            return 0;
+        }
+
+        int bigramCount = 0;
+        int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
+        if (bigramExist > 0) {
+            int nextBigramExist = 1;
+            while (nextBigramExist > 0) {
+                int bigramAddress = getBigramAddress(&pos, true);
+                int frequency = (FLAG_BIGRAM_FREQ & mDict[pos]);
+                // search for all bigrams and store them
+                searchForTerminalNode(bigramAddress, frequency);
+                nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED);
+                bigramCount++;
+            }
+        }
+
+        return bigramCount;
+    }
+    return 0;
+}
+
+void
+Dictionary::searchForTerminalNode(int addressLookingFor, int frequency)
+{
+    // track word with such address and store it in an array
+    unsigned short word[mMaxWordLength];
+
+    int pos;
+    int followDownBranchAddress = DICTIONARY_HEADER_SIZE;
+    bool found = false;
+    char followingChar = ' ';
+    int depth = -1;
+
+    while(!found) {
+        bool followDownAddressSearchStop = false;
+        bool firstAddress = true;
+        bool haveToSearchAll = true;
+
+        if (depth >= 0) {
+            word[depth] = (unsigned short) followingChar;
+        }
+        pos = followDownBranchAddress; // pos start at count
+        int count = mDict[pos] & 0xFF;
+        LOGI("count - %d\n",count);
+        pos++;
+        for (int i = 0; i < count; i++) {
+            // pos at data
+            pos++;
+            // pos now at flag
+            if (!getFirstBitOfByte(&pos)) { // non-terminal
+                if (!followDownAddressSearchStop) {
+                    int addr = getBigramAddress(&pos, false);
+                    if (addr > addressLookingFor) {
+                        followDownAddressSearchStop = true;
+                        if (firstAddress) {
+                            firstAddress = false;
+                            haveToSearchAll = true;
+                        } else if (!haveToSearchAll) {
+                            break;
+                        }
+                    } else {
+                        followDownBranchAddress = addr;
+                        followingChar = (char)(0xFF & mDict[pos-1]);
+                        if (firstAddress) {
+                            firstAddress = false;
+                            haveToSearchAll = false;
+                        }
+                    }
+                }
+                pos += 3;
+            } else if (getFirstBitOfByte(&pos)) { // terminal
+                if (addressLookingFor == (pos-1)) { // found !!
+                    depth++;
+                    word[depth] = (0xFF & mDict[pos-1]);
+                    found = true;
+                    break;
+                }
+                if (getSecondBitOfByte(&pos)) { // address + freq (4 byte)
+                    if (!followDownAddressSearchStop) {
+                        int addr = getBigramAddress(&pos, false);
+                        if (addr > addressLookingFor) {
+                            followDownAddressSearchStop = true;
+                            if (firstAddress) {
+                                firstAddress = false;
+                                haveToSearchAll = true;
+                            } else if (!haveToSearchAll) {
+                                break;
+                            }
+                        } else {
+                            followDownBranchAddress = addr;
+                            followingChar = (char)(0xFF & mDict[pos-1]);
+                            if (firstAddress) {
+                                firstAddress = false;
+                                haveToSearchAll = true;
+                            }
+                        }
+                    }
+                    pos += 4;
+                } else { // freq only (2 byte)
+                    pos += 2;
+                }
+
+                // skipping bigram
+                int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ);
+                if (bigramExist > 0) {
+                    int nextBigramExist = 1;
+                    while (nextBigramExist > 0) {
+                        pos += 3;
+                        nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED);
+                    }
+                } else {
+                    pos++;
+                }
+            }
+        }
+        depth++;
+        if (followDownBranchAddress == 0) {
+            LOGI("ERROR!!! Cannot find bigram!!");
+            break;
+        }
+    }
+
+    addWordBigram(word, depth, frequency);
 }
 
 bool
+Dictionary::isValidWord(unsigned short *word, int length)
+{
+    if (checkIfDictVersionIsLatest()) {
+        return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
+    } else {
+        return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
+    }
+}
+
+int
 Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
+    // returns address of bigram data of that word
+    // return -99 if not found
+
     int count = getCount(&pos);
     unsigned short currentChar = (unsigned short) word[offset];
     for (int j = 0; j < count; j++) {
@@ -287,12 +549,13 @@ Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length
         if (c == currentChar) {
             if (offset == length - 1) {
                 if (terminal) {
-                    return true;
+                    return (pos+1);
                 }
             } else {
                 if (childPos != 0) {
-                    if (isValidWordRec(childPos, word, offset + 1, length)) {
-                        return true;
+                    int t = isValidWordRec(childPos, word, offset + 1, length);
+                    if (t > 0) {
+                        return t;
                     }
                 }
             }
@@ -303,7 +566,7 @@ Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length
         // There could be two instances of each alphabet - upper and lower case. So continue
         // looking ...
     }
-    return false;
+    return NOT_VALID_WORD;
 }
 
 
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index 3749f3d88..2c574290f 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -28,12 +28,19 @@ namespace latinime {
 // if the word has other endings.
 #define FLAG_TERMINAL_MASK 0x80
 
+#define FLAG_BIGRAM_READ 0x80
+#define FLAG_BIGRAM_CHILDEXIST 0x40
+#define FLAG_BIGRAM_CONTINUED 0x80
+#define FLAG_BIGRAM_FREQ 0x7F
+
 class Dictionary {
 public:
     Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier);
     int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
             int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
             int *nextLetters, int nextLettersSize);
+    int getBigrams(unsigned short *word, int length, unsigned short *outWords, int *frequencies,
+            int maxWordLength, int maxBigrams);
     bool isValidWord(unsigned short *word, int length);
     void setAsset(void *asset) { mAsset = asset; }
     void *getAsset() { return mAsset; }
@@ -41,28 +48,40 @@ public:
 
 private:
 
+    void getVersionNumber();
+    bool checkIfDictVersionIsLatest();
     int getAddress(int *pos);
+    int getBigramAddress(int *pos, bool advance);
+    int getFreq(int *pos);
+    int getBigramFreq(int *pos);
+    void searchForTerminalNode(int address, int frequency);
+
+    bool getFirstBitOfByte(int *pos) { return (mDict[*pos] & 0x80) > 0; }
+    bool getSecondBitOfByte(int *pos) { return (mDict[*pos] & 0x40) > 0; }
     bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
-    int getFreq(int *pos) { return mDict[(*pos)++] & 0xFF; }
     int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
     unsigned short getChar(int *pos);
     int wideStrLen(unsigned short *str);
 
     bool sameAsTyped(unsigned short *word, int length);
     bool addWord(unsigned short *word, int length, int frequency);
+    bool addWordBigram(unsigned short *word, int length, int frequency);
     unsigned short toLowerCase(unsigned short c);
     void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
             int inputIndex, int diffs);
-    bool isValidWordRec(int pos, unsigned short *word, int offset, int length);
+    int isValidWordRec(int pos, unsigned short *word, int offset, int length);
     void registerNextLetter(unsigned short c);
 
     unsigned char *mDict;
     void *mAsset;
 
     int *mFrequencies;
+    int *mBigramFreq;
     int mMaxWords;
+    int mMaxBigrams;
     int mMaxWordLength;
     unsigned short *mOutputChars;
+    unsigned short *mBigramChars;
     int *mInputCodes;
     int mInputLength;
     int mMaxAlternatives;
@@ -74,6 +93,8 @@ private:
     int mTypedLetterMultiplier;
     int *mNextLettersFrequencies;
     int mNextLettersSize;
+    int mVersion;
+    int mBigram;
 };
 
 // ----------------------------------------------------------------------------
diff --git a/tests/data/bigramlist.xml b/tests/data/bigramlist.xml
new file mode 100644
index 000000000..dd3f2916e
--- /dev/null
+++ b/tests/data/bigramlist.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+/*
+**
+** Copyright 2010, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+-->
+
+<bigrams>
+    <bi w1="I'm" count="1">
+        <w w2="about" p="100" />
+    </bi>
+    <bi w1="about" count="3">
+        <w w2="part" p="117" />
+        <w w2="business" p="100" />
+        <w w2="being" p="10" />
+    </bi>
+    <bi w1="business" count="1">
+        <w w2="people" p="100" />
+    </bi>
+    <bi w1="from" count="1">
+        <w w2="same" p="117" />
+    </bi>
+</bigrams>
diff --git a/tests/data/wordlist.xml b/tests/data/wordlist.xml
index 22d0caa38..b870eb2a3 100644
--- a/tests/data/wordlist.xml
+++ b/tests/data/wordlist.xml
@@ -225,6 +225,7 @@
  <w f="179">services</w>
  <w f="170">niño</w>
  <w f="170">María</w>
+ <w f="70">car</w>
  <w f="0">hmmm</w>
  <w f="0">hon</w>
  <w f="0">tty</w>
diff --git a/tests/res/raw/test.dict b/tests/res/raw/test.dict
index e789aaa9a66cd003e1282870a79fc12a8d731fdd..6a5d6d794f4a1f5c21f25ff97bfd89fe5a0f80f5 100644
GIT binary patch
literal 2829
zcmXYzdyG}p6~^~I?{iR^*d}UQYeO|tt3_<9Z7rk=+hQvv(o{*51R3U@nYr9M_mcaV
zVVcmM65CWv)%b{kno?5Q6q;Jw;W@+O$U_DY5D1Tok5NI?f(nX_)IfjhPX9SMd+)Wr
zwZ65!z0Mi7pKXXKwTpeMD$ciy;tX_(dr6<Tv--uGGDp0%KNY{&E&jr~_@7iH9NQyd
z|Bobmd#*%LQ=-c|B)YF4(fehIJ8BYd@0Iw>JW0lONzzx6WLql9!D*6w9CnLYRL$+J
zXHB2jX+^OqVXF<X3+Ib{$WL>#?4oHGv$H7XR86d36+15I&kmTHm_ar-_hdUvCEIH1
zVz${d+hc4k+hus$Vw^^94!>zCxf$7FY@3*4ww`S^wv?Nbn+)&kjZ@EFH5D=EoDwHp
zxixF_V2yE7v11ALX^?PfioL>iiM`5pXB+Vndp%ndINWG-|7P}A8>b-ld(~{nSXlhf
z_N!@?{J@%FD`IC_n9XK?z}OzKb8&ypI3=<3%VHO>8YDSzY=3SaFvgZM9t|FGCAWk7
z;B1f8vqJ{!4;tBiGe;cTDbDA*#Q91Oer_TA$n<C14T<m2SkK#TvHLCY4^tKrOtT|;
zcw7m+ttq{03)x!Zl(U0W(0*d8%9geVSU6({zBfw>=Q8hu<C#Ax0n_;+aL%>BJ*zaG
z8!ZC)hOK0|i8^y<);a3!G)7D<J7$1b*elM1cs)@PXL(tir!7V)Jn@=qJH)9%dz~h>
z9a_!;it{Xu%XR}WOdG{{gizZJM4TnMT*`7=EAk3#*BkO*WjtaIpEQl!xiDhtMD9c(
z4q^9Gp2LvZ+9Gw#*2Ed1^v(%Q{T-$Ao@Vl)QvMhcD56c=unrq!u+O+vaj&e2+g22J
zl2gl0z{wp)3WIc3+zF|;-?BZ~i&?Ot*&^a%d`GXicU8o_$JWKYlNwT~bTrk~#s{g{
zF7D!PasQs??th=7A>QXn*8O=|+{bNq?*3{-({&fo1NUM4*Qylu8c&(^6B_K5-J{wV
z1>{1xM*uxh&30ljV4Mzd_W*YQT^`WrYm8ILo<~9M2rCk5<nF26#$tDd+PWXvb^yuV
zi^}3%+9uv**kMrC+Od5(%k{=>igz8WueXag*<v}K`tL%+*#_F@O>A|8n8t8Zy_u<a
zO~oyW_eZ?`X`8t>=M_UC_ZdQz6nGxMYmL{Qd%rQJfk@T6F5cs|mU|CFy^cPpq_>z(
zdB3OjT#Ax-e?bl2-#BHd55GV6UQo?>htk|Tf0VJ|-(3~|z7{@``j?`<q9AXeeD5vw
zKdr!L@n4%IHx~R<{7-d=|Cvtlzg!gmn+@?Nqlz7>H~&i86#ol2_+J9Dr6~WaFz~;|
z61`N!pJ==E_V<ZD1$2L^KBl*1Wr4ky%dM3czd(t&=HOM-t!@kA&(|2L|D(oFbARy>
zCGa#^<d8VfBmP;Zmix<xsk6U=hzz-TwAJkuf2hFRtLyAbnGSB9%iP1A=@Nerfd-7%
z$CUx$4FbMZ(So%ShRJE(s4i#!LB_^J)jYWRu<4fI`>6!8+a*}hlpt?Nu&XG+$$kmW
zV;o##(>%DI`^mus<P+RXS%X{Q5`32uAU2(BRSBj!xX<X*qLd|=i99yJj%+|~)+D&U
zSAwMtorf%bW;-PK1sRhO`wuAQL%R2vhFj8_%vh4xa%wtYT+Y|%eITPDplL~}WyNDV
zC=6c=4if2Q4Sp0Uqb4$b9W!Pg$-Y1T|MutDQhW(6Vi9@-31cd<3LWR+wYwSN;rOP6
zKWUdRElOBLPlrt(Bg*Z|!$~{Ld<k#smGE|?w2B}a36~$+nsVdwIK$47aHj6fW^Y4l
zPv<{D&q&y?)oi5+^t)D-@X-`p0{w}AI|%loQ<Ly_bqW7~16Q(vzo%oql85Uyn1X~G
zOA>Aw9fzsbS|5yn@CBR>Q3~d5Lql~*_$uuRkF%suZ4$og_Aug|O9~`^p)8S$Z)@CO
z&6oUAN)la$^={+M%cC21nR*`Gk~4v#+gcZ~D$z|eL+R$xBpO$MFyFywYD1#C;Tuhd
zFJrnW(Y<tluC+^4#%&opZ+aDUOxOj4d8(F8K_+>0Kf{&7-{Slvl8PQhQhWh)(39ON
z9YWDFJ$W>!lQ23!bEDOq=Bm1NW}ZY_Eis1Gbq_^>D7hTby_YP1Ca*z<rm**KP6xEZ
zaX8G3if?F__@-3iTbdHz>FM0=llY2?#8<J|22K7N+9Vza0H1yRO&~09{o2rm#NQ!C
zEKrSqKvMA?qhjC(=q}y6hlJu;aN$C-Y+$>jD)Dl#bGIb%FG2Wqr^HV{l8M9WK}C6l
zCG~5yr7rQ(R&W=nf8e}^KdT`i$)q$|9%Tk)3K;JN`DJ<@AEf8;QTDkZM15UhPwL|o
zsq1Bx_<T_k&ywmzw)7%N65Bq2Jl?J@OTxrY7^g`$U#8?5%ir8}ASB~SY^x$96ET<y
zd@>o$B+NgZi5-$m=Z$>$^_x|bq=OY~S{H<nJ=7$rv7{!|JXy4ni<bPEgTG?Cny+v2
zB#Wn7;w|W}3Fgb-OCuV(?3Lsh5LNP$3~Kbv93Xe>c50@3A+gCg*uBKzdusL?U&0~2
fgvmQlMY!bnp>54$EH2oZjB)L0858kETxtFXLKju=

literal 2562
zcmX9=eT-aH6@Ta6`@WK5QWI*@CRRl)h$v`5lp0Slp;|-`geW37otfR4&dhtwo7tUR
z4dxmRNR1G+Adr-r7O6BMmD!ou+0WZ<w>#{X)^4*~x@g<5DNB$FZD>P5sK2}ZF}wGk
zd(Q9t&c{9XEw8YXPcbDi)5ScCVh4-iDoeMQ<?IGlH{8MMc#hTiGP|jWy?aaSjeeW`
z%QmvVri=acY4$f~I9O8TpgZB<AKe_hw~50`J2~tra5$3X@W3q`zUAlGc`!C~+1V=7
z>O9kWc)g>-l<8rL+^li7UQDqwnrG)<2|H)O=Mt}1Ph(w!tXa|%ksd3vGe}we5+!;P
z|M!ZDaVB4jjB{i!tzzd8mGvkUjB{)RtClG16A?S7L_v>s8s|R^2pb~H&igSt7hyk2
zT}+BPnL^6xVNqsU2FY?94O{6-Nb4fQbY-ju352hvUZ}--BFnUfBBr&35N?9BMkS_O
zVYpuunC>Yu{REo<D(E`(8rtYkQ9D%8P8b<_^iaZ7E9)7ky;#wQH!#g~FwMi|nUb!{
zj6UD1$B}|RS@=Iwj%gRc<@1QUF0%S4js`7vV>ygdMmI%K9|8Tt6eB7e&)MS>aCp!{
zyl$;e!+MX6;yfY$?^C2r&|$<U&qYRDdot0lbg@|5&Ei`X7T+tdxUa|}N6=U`00Z%P
zO0&2ILG^>dIL^L~u=j|Ho=S8Z_~I7ekAthY%kF-J-9C5|8K|{Du}{Iz{R<T%{(iEo
zU+K`(0*)R)!)*rXF_0FIP{LvhGAW+0mQR7Y*kOJD#hyL~#x1dmMXRh0N_j{^I0=hc
zXe@|CpF#lBa62GyZ~`3WsifyDV+pNy5ZVV3OQJH13&_kKoLFLYH_viKishOd%kO55
z{Qdrz<!-Q(pDwbzo^nQh?j#t<FCwXOHB6hR=0Sxhdi4Mx*XI^%y9kuZvP=N>^H{%#
zwh(ES8-Ws`m8RjaAtHSsVfj<6_fW;iUriBI9zx#a!<5nvsLE|H*s+S`PAE;m)fn<3
z|B6jZg1|5~`{7}lptctQOrpVuus&$B3_p$wu5V;nS-d=}D-u>;p)MoeK8`FM0`R*Q
z@B;ugRm!NPPDR&YR%W$=5~DuWM0Wd;CiMv<UwsBCb*RAV3#hC5GR_?Pb9;@t9X+G&
z${N-Ea*EY~nAMC0MN=Gu^}W_QjTTioYgw|02~3+VcE3rpdZdHZcIfP`uxg>Y6R5h{
zLcn~&>OKW5+ZSpl=qek(?RQnDPTa;Ss}YMjX8*KpYY-3*<H8}Wtmcr1x<#6|Ca+V5
z)oF`<7Tx$V#*Mq=Xo}sB=h?j~W_L}A-5-d=xXao_cJ2M2q)oaZyV<=WqhBgx;$-!V
zEbAjBJ>JPKdP3J!Rqun{&0Xx?8X+f^9>(PNvFJw+yFW(vIYDgPU(V*(eJ0E9Xo@}2
z#on?Cdn@zoeXW<ho2}}v35*_YgSh(#bb|XhGUsjub9Xy}#mv%ZN%wECbsJR#50Zl@
z_K2I+`w#}Eq<a!vFH|sxus%v@cK->E{RHVL3-*Q;y=C2;vtzJ^thpZ|Yc&ZTA3+;q
zuzwVgNGI}^W^V<|hXoEV2kC3z(Yp$r*aSuII_NY7UV|HZ{;#_Z0-&;Yn?1f0*xua;
zzR!(~SDC`@X9)U9;5-Ms2D)Rms@t+^yoV-w*!yiad%pvxKC%}J+K*API8VK8w1K^+
zt>{^ZhL9=mg+*6q*c+r+@3JpM%-(EPw}HNZ2TA4Gzf>gbodBJAC{F=(NMW2t#(Q@t
z!`^=j>^qAyDT_?vhb!t|Qf7ZCA|8Nt1Eb$xjxzWwv8&@@@h_JpeXyJT)kXHNhmF1e
z4Rn{k7E${*L2XKHGX6c2W#eZ}#C~kAOw4`?DMn1jFZ3h1HM}K$Plf$oASVBQ#MDA<
z{0D$mlPUK92#c{i2U13Q60UXte3XjDe|7+s@Lzz@AezE&qABdu?*o-Sj0FF1$@p`a
zME=_-ls}6jgP!T%#Qv)U+y!g&CStCmKK?l?dXEtGMG&t+6a+Y{p<2PG+Hn8P6bI|F
z9H6s<w8EgQa`2^ygX<vs1_ByEN`f0;JB+F|;A|M?Ll);YKnHgsrr>T63GQ30Dn1Kx
zR#XCuAOZCniWEEu&0R4EHGm$F1rD~r{_zeDo<UF(7Hq49*p6KbKa3;}p0_?FPz((B
z;Zvgxv<9xcO#}kL;|SzqKwLl#gX5@SFc0yVrS+zTJ7fQx2VMIe5Tf_PC6sVz<F|t(
zTuN14S6F<u$l+I09R4uoa3d-iUP<_@xf<r-wQx3O&2PlHjW}Efa^ZI%Hk9TN{SPAe
z0NMcL4s2SW&_o8p7#RpF*ws{Q!p+0L*a7iQ=<UX17Cwf}lePgfc*LQ)4dIPIeH7O?
zwDS-B%3<BQw2eAob?kr)+n_xnpfrP{*R0YU-kCw*zKY12mes$>4n!QkLl9q}(;R+?
I_hHxn0FARwX8-^I

diff --git a/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java b/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java
index 9401d926a..59720640a 100644
--- a/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java
+++ b/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java
@@ -71,7 +71,7 @@ public class SuggestTests extends AndroidTestCase {
             Log.w(TAG, "No available size for binary dictionary");
         }
         mSuggest.setAutoTextEnabled(false);
-        mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL);
+        mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM);
     }
 
     /************************** Helper functions ************************/
@@ -108,19 +108,56 @@ public class SuggestTests extends AndroidTestCase {
 
     private boolean isDefaultSuggestion(CharSequence typed, CharSequence expected) {
         WordComposer word = createWordComposer(typed);
-        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false);
+        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
+        return isDefaultSuggestion(suggestions, expected);
+    }
+
+    private void getBigramSuggestions(CharSequence previous, CharSequence typed) {
+        if(!TextUtils.isEmpty(previous) && (typed.length() > 1)) {
+            WordComposer firstChar = createWordComposer(typed.charAt(0) + "");
+            mSuggest.getSuggestions(null, firstChar, false, previous);
+        }
+    }
+
+    private boolean isDefaultNextSuggestion(CharSequence previous, CharSequence typed,
+            CharSequence expected) {
+        WordComposer word = createWordComposer(typed);
+        getBigramSuggestions(previous, typed);
+        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
         return isDefaultSuggestion(suggestions, expected);
     }
 
     private boolean isDefaultCorrection(CharSequence typed, CharSequence expected) {
         WordComposer word = createWordComposer(typed);
-        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false);
+        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
+        return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection();
+    }
+
+    private boolean isDefaultNextCorrection(CharSequence previous, CharSequence typed,
+            CharSequence expected) {
+        WordComposer word = createWordComposer(typed);
+        getBigramSuggestions(previous, typed);
+        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
+        for(int i=0;i<suggestions.size();i++) {
+            Log.i(TAG,i+" "+suggestions.get(i));
+        }
         return isDefaultSuggestion(suggestions, expected) && mSuggest.hasMinimalCorrection();
     }
 
     private boolean isASuggestion(CharSequence typed, CharSequence expected) {
         WordComposer word = createWordComposer(typed);
-        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false);
+        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, null);
+        for (int i = 1; i < suggestions.size(); i++) {
+            if (TextUtils.equals(suggestions.get(i), expected)) return true;
+        }
+        return false;
+    }
+
+    private boolean isASuggestion(CharSequence previous, CharSequence typed,
+            CharSequence expected) {
+        WordComposer word = createWordComposer(typed);
+        getBigramSuggestions(previous, typed);
+        List<CharSequence> suggestions = mSuggest.getSuggestions(null, word, false, previous);
         for (int i = 1; i < suggestions.size(); i++) {
             if (TextUtils.equals(suggestions.get(i), expected)) return true;
         }
@@ -241,8 +278,30 @@ public class SuggestTests extends AndroidTestCase {
      * Are accented forms of words suggested as corrections?
      */
     public void testAccents() {
-        assertTrue(isDefaultCorrection("nino", "ni\u00F1o")); // ni�o
-        assertTrue(isDefaultCorrection("nimo", "ni\u00F1o")); // ni�o
-        assertTrue(isDefaultCorrection("maria", "Mar\u00EDa")); // Mar�a
+        assertTrue(isDefaultCorrection("nino", "ni\u00F1o")); // niño
+        assertTrue(isDefaultCorrection("nimo", "ni\u00F1o")); // niño
+        assertTrue(isDefaultCorrection("maria", "Mar\u00EDa")); // María
+    }
+
+    /**
+     * Make sure bigrams are showing when first character is typed
+     *  and don't show any when there aren't any
+     */
+    public void testBigramsAtFirstChar() {
+        assertTrue(isDefaultNextCorrection("about", "p", "part"));
+        assertTrue(isDefaultNextCorrection("I'm", "a", "about"));
+        assertTrue(isDefaultNextCorrection("about", "b", "business"));
+        assertTrue(isASuggestion("about", "b", "being"));
+        assertFalse(isDefaultNextSuggestion("about", "p", "business"));
+    }
+
+    /**
+     * Make sure bigrams score affects the original score
+     */
+    public void testBigramsScoreEffect() {
+       assertTrue(isDefaultCorrection("pa", "page"));
+       assertTrue(isDefaultNextCorrection("about", "pa", "part"));
+       assertTrue(isDefaultCorrection("sa", "said"));
+       assertTrue(isDefaultNextCorrection("from", "sa", "same"));
     }
 }