diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 4bb64ee90..9f934c6ef 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -1,12 +1,12 @@ /* * Copyright (C) 2008 The Android Open Source Project - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -17,13 +17,9 @@ package com.android.inputmethod.latin; import android.content.Context; +import android.content.res.AssetFileDescriptor; import android.util.Log; -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.Channels; import java.util.Arrays; /** @@ -48,15 +44,12 @@ public class BinaryDictionary extends Dictionary { private int mDicTypeId; private int mNativeDict; - private int mDictLength; + private long mDictLength; private final int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES]; private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; private final int[] mFrequencies = new int[MAX_WORDS]; private final int[] mFrequencies_bigrams = new int[MAX_BIGRAMS]; - // Keep a reference to the native dict direct buffer in Java to avoid - // unexpected deallocation of the direct buffer. - private ByteBuffer mNativeDictDirectBuffer; static { try { @@ -78,30 +71,9 @@ public class BinaryDictionary extends Dictionary { mDicTypeId = dicTypeId; } - /** - * Create a dictionary from a byte buffer. This is used for testing. - * @param context application context for reading resources - * @param byteBuffer a ByteBuffer containing the binary dictionary - */ - public BinaryDictionary(Context context, ByteBuffer byteBuffer, int dicTypeId) { - if (byteBuffer != null) { - if (byteBuffer.isDirect()) { - mNativeDictDirectBuffer = byteBuffer; - } else { - mNativeDictDirectBuffer = ByteBuffer.allocateDirect(byteBuffer.capacity()); - byteBuffer.rewind(); - mNativeDictDirectBuffer.put(byteBuffer); - } - mDictLength = byteBuffer.capacity(); - mNativeDict = openNative(mNativeDictDirectBuffer, - TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER, - MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES); - } - mDicTypeId = dicTypeId; - } - - private native int openNative(ByteBuffer bb, int typedLetterMultiplier, - int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives); + private native int openNative(String apkFileName, long dictOffset, long dictSize, + int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, + int maxWords, int maxAlternatives); private native void closeNative(int dict); private native boolean isValidWordNative(int nativeData, char[] word, int wordLength); private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize, @@ -112,37 +84,17 @@ public class BinaryDictionary extends Dictionary { int maxWordLength, int maxBigrams, int maxAlternatives); private final void loadDictionary(Context context, int resId) { - InputStream is = null; - try { - is = context.getResources().openRawResource(resId); - final int total = is.available(); - mNativeDictDirectBuffer = - ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder()); - final int got = Channels.newChannel(is).read(mNativeDictDirectBuffer); - if (got != total) { - Log.e(TAG, "Read " + got + " bytes, expected " + total); - } else { - mNativeDict = openNative(mNativeDictDirectBuffer, - TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER, - MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES); - mDictLength = total; - } - } catch (IOException e) { - Log.w(TAG, "No available memory for binary dictionary"); - } finally { - try { - if (is != null) is.close(); - } catch (IOException e) { - Log.w(TAG, "Failed to close input stream"); - } - } + final AssetFileDescriptor afd = context.getResources().openRawResourceFd(resId); + mNativeDict = openNative(context.getApplicationInfo().sourceDir, + afd.getStartOffset(), afd.getLength(), + TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER, + MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES); + mDictLength = afd.getLength(); } - @Override public void getBigrams(final WordComposer codes, final CharSequence previousWord, final WordCallback callback, int[] nextLettersFrequencies) { - char[] chars = previousWord.toString().toCharArray(); Arrays.fill(mOutputChars_bigrams, (char) 0); Arrays.fill(mFrequencies_bigrams, 0); @@ -212,7 +164,7 @@ public class BinaryDictionary extends Dictionary { return isValidWordNative(mNativeDict, chars, chars.length); } - public int getSize() { + public long getSize() { return mDictLength; // This value is initialized on the call to openNative() } @@ -221,6 +173,7 @@ public class BinaryDictionary extends Dictionary { if (mNativeDict != 0) { closeNative(mNativeDict); mNativeDict = 0; + mDictLength = 0; } } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index be98f4c3e..ad783cac0 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -371,23 +371,19 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen int mainDicResId = getMainDictionaryResourceId(res); mSuggest = new Suggest(this, mainDicResId); loadAndSetAutoCorrectionThreshold(prefs); - if (mUserDictionary != null) mUserDictionary.close(); + mUserDictionary = new UserDictionary(this, locale); - if (mContactsDictionary == null) { - mContactsDictionary = new ContactsDictionary(this, Suggest.DIC_CONTACTS); - } - if (mAutoDictionary != null) { - mAutoDictionary.close(); - } + mSuggest.setUserDictionary(mUserDictionary); + + mContactsDictionary = new ContactsDictionary(this, Suggest.DIC_CONTACTS); + mSuggest.setContactsDictionary(mContactsDictionary); + mAutoDictionary = new AutoDictionary(this, this, locale, Suggest.DIC_AUTO); - if (mUserBigramDictionary != null) { - mUserBigramDictionary.close(); - } + mSuggest.setAutoDictionary(mAutoDictionary); + mUserBigramDictionary = new UserBigramDictionary(this, this, locale, Suggest.DIC_USER); mSuggest.setUserBigramDictionary(mUserBigramDictionary); - mSuggest.setUserDictionary(mUserDictionary); - mSuggest.setContactsDictionary(mContactsDictionary); - mSuggest.setAutoDictionary(mAutoDictionary); + updateCorrectionMode(); mWordSeparators = res.getString(R.string.word_separators); mSentenceSeparators = res.getString(R.string.sentence_separators); @@ -397,11 +393,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen @Override public void onDestroy() { - if (mUserDictionary != null) { - mUserDictionary.close(); - } - if (mContactsDictionary != null) { - mContactsDictionary.close(); + if (mSuggest != null) { + mSuggest.close(); + mSuggest = null; } unregisterReceiver(mReceiver); mVoiceConnector.destroy(); diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 9f979fffd..9ea9c2f3e 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -1,12 +1,12 @@ /* * Copyright (C) 2008 The Android Open Source Project - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -22,12 +22,11 @@ import android.text.TextUtils; import android.util.Log; import android.view.View; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; /** - * This class loads a dictionary and provides a list of suggestions for a given sequence of + * This class loads a dictionary and provides a list of suggestions for a given sequence of * characters. This includes corrections and completions. */ public class Suggest implements Dictionary.WordCallback { @@ -108,11 +107,6 @@ public class Suggest implements Dictionary.WordCallback { initPool(); } - public Suggest(Context context, ByteBuffer byteBuffer) { - mMainDict = new BinaryDictionary(context, byteBuffer, DIC_MAIN); - initPool(); - } - private void initPool() { for (int i = 0; i < mPrefMaxSuggestions; i++) { StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); @@ -154,7 +148,7 @@ public class Suggest implements Dictionary.WordCallback { public void setContactsDictionary(Dictionary userDictionary) { mContactsDictionary = userDictionary; } - + public void setAutoDictionary(Dictionary autoDictionary) { mAutoDictionary = autoDictionary; } @@ -232,7 +226,7 @@ public class Suggest implements Dictionary.WordCallback { if (!TextUtils.isEmpty(prevWordForBigram)) { CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); - if (mMainDict.isValidWord(lowerPrevWord)) { + if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { prevWordForBigram = lowerPrevWord; } if (mUserBigramDictionary != null) { @@ -383,7 +377,7 @@ public class Suggest implements Dictionary.WordCallback { return mHaveCorrection; } - private boolean compareCaseInsensitive(final String mLowerOriginalWord, + private boolean compareCaseInsensitive(final String mLowerOriginalWord, final char[] word, final int offset, final int length) { final int originalLength = mLowerOriginalWord.length(); if (originalLength == length && Character.isUpperCase(word[offset])) { @@ -456,7 +450,7 @@ public class Suggest implements Dictionary.WordCallback { System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1); priorities[pos] = freq; int poolSize = mStringPool.size(); - StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) + StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) : new StringBuilder(getApproxMaxWordLength()); sb.setLength(0); if (mIsAllUpperCase) { @@ -510,7 +504,7 @@ public class Suggest implements Dictionary.WordCallback { || (mAutoDictionary != null && mAutoDictionary.isValidWord(word)) || (mContactsDictionary != null && mContactsDictionary.isValidWord(word)); } - + private void collectGarbage(ArrayList suggestions, int prefMaxSuggestions) { int poolSize = mStringPool.size(); int garbageSize = suggestions.size(); @@ -531,6 +525,23 @@ public class Suggest implements Dictionary.WordCallback { public void close() { if (mMainDict != null) { mMainDict.close(); + mMainDict = null; + } + if (mUserDictionary != null) { + mUserDictionary.close(); + mUserDictionary = null; + } + if (mUserBigramDictionary != null) { + mUserBigramDictionary.close(); + mUserBigramDictionary = null; + } + if (mContactsDictionary != null) { + mContactsDictionary.close(); + mContactsDictionary = null; + } + if (mAutoDictionary != null) { + mAutoDictionary.close(); + mAutoDictionary = null; } } } diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 9948448f7..637429298 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -15,12 +15,24 @@ ** limitations under the License. */ +#define LOG_TAG "LatinIME: jni" + #include "dictionary.h" #include "jni.h" #include +#include #include +#ifdef USE_MMAP_FOR_DICTIONARY +#include +#include +#include +#include +#else // USE_MMAP_FOR_DICTIONARY +#include +#endif // USE_MMAP_FOR_DICTIONARY + // ---------------------------------------------------------------------------- using namespace latinime; @@ -37,24 +49,84 @@ static void throwException(JNIEnv *env, const char* ex, const char* fmt, int dat } } -static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object, jobject dictDirectBuffer, +static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object, + jstring apkFileName, jlong dictOffset, jlong dictSize, jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords, jint maxAlternatives) { - void *dict = env->GetDirectBufferAddress(dictDirectBuffer); - if (dict == NULL) { - fprintf(stderr, "DICT: Dictionary buffer is null\n"); + PROF_OPEN; + PROF_START(66); + const char *apkFileNameChars = env->GetStringUTFChars(apkFileName, NULL); + if (apkFileNameChars == NULL) { + LOGE("DICT: Can't get apk file name"); return 0; } - Dictionary *dictionary = new Dictionary(dict, typedLetterMultiplier, fullWordMultiplier, - maxWordLength, maxWords, maxAlternatives); - return (jint) dictionary; + int fd = 0; + void *dictBuf = NULL; + int adjust = 0; +#ifdef USE_MMAP_FOR_DICTIONARY + /* mmap version */ + fd = open(apkFileNameChars, O_RDONLY); + if (fd < 0) { + LOGE("DICT: Can't open apk file. errno=%d", errno); + return 0; + } + int pagesize = getpagesize(); + adjust = dictOffset % pagesize; + int adjDictOffset = dictOffset - adjust; + int adjDictSize = dictSize + adjust; + dictBuf = mmap(NULL, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset); + if (dictBuf == MAP_FAILED) { + LOGE("DICT: Can't mmap dictionary file. errno=%d", errno); + return 0; + } + dictBuf = (void *)((char *)dictBuf + adjust); +#else // USE_MMAP_FOR_DICTIONARY + /* malloc version */ + FILE *file = NULL; + file = fopen(apkFileNameChars, "rb"); + if (file == NULL) { + LOGE("DICT: Can't fopen apk file. errno=%d", errno); + return 0; + } + dictBuf = malloc(sizeof(char) * dictSize); + if (dictBuf == NULL) { + LOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno); + return 0; + } + int ret = fseek(file, (long)dictOffset, SEEK_SET); + if (ret != 0) { + LOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno); + return 0; + } + ret = fread(dictBuf, sizeof(char) * dictSize, 1, file); + if (ret != 1) { + LOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno); + return 0; + } + ret = fclose(file); + if (ret != 0) { + LOGE("DICT: Failure in fclose. ret=%d errno=%d", ret, errno); + return 0; + } +#endif // USE_MMAP_FOR_DICTIONARY + env->ReleaseStringUTFChars(apkFileName, apkFileNameChars); + + if (!dictBuf) { + LOGE("DICT: dictBuf is null"); + return 0; + } + Dictionary *dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier, + fullWordMultiplier, maxWordLength, maxWords, maxAlternatives); + PROF_END(66); + PROF_CLOSE; + return (jint)dictionary; } static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jint dict, jintArray inputArray, jint arraySize, jcharArray outputArray, jintArray frequencyArray, jintArray nextLettersArray, jint nextLettersSize) { - Dictionary *dictionary = (Dictionary*) dict; - if (dictionary == NULL) return 0; + Dictionary *dictionary = (Dictionary*)dict; + if (!dictionary) return 0; int *frequencies = env->GetIntArrayElements(frequencyArray, NULL); int *inputCodes = env->GetIntArrayElements(inputArray, NULL); @@ -79,8 +151,8 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jin jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize, jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams, jint maxAlternatives) { - Dictionary *dictionary = (Dictionary*) dict; - if (dictionary == NULL) return 0; + Dictionary *dictionary = (Dictionary*)dict; + if (!dictionary) return 0; jchar *prevWord = env->GetCharArrayElements(prevWordArray, NULL); int *inputCodes = env->GetIntArrayElements(inputArray, NULL); @@ -99,11 +171,10 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jin return count; } - static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength) { - Dictionary *dictionary = (Dictionary*) dict; - if (dictionary == NULL) return (jboolean) false; + Dictionary *dictionary = (Dictionary*)dict; + if (!dictionary) return (jboolean) false; jchar *word = env->GetCharArrayElements(wordArray, NULL); jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength); @@ -113,13 +184,30 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec } static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jint dict) { - delete (Dictionary*) dict; + Dictionary *dictionary = (Dictionary*)dict; + if (!dictionary) return; + void *dictBuf = dictionary->getDict(); + if (!dictBuf) return; +#ifdef USE_MMAP_FOR_DICTIONARY + int ret = munmap((void *)((char *)dictBuf - dictionary->getDictBufAdjust()), + dictionary->getDictSize() + dictionary->getDictBufAdjust()); + if (ret != 0) { + LOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); + } + ret = close(dictionary->getMmapFd()); + if (ret != 0) { + LOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno); + } +#else // USE_MMAP_FOR_DICTIONARY + free(dictBuf); +#endif // USE_MMAP_FOR_DICTIONARY + delete dictionary; } // ---------------------------------------------------------------------------- static JNINativeMethod gMethods[] = { - {"openNative", "(Ljava/nio/ByteBuffer;IIIII)I", (void*)latinime_BinaryDictionary_open}, + {"openNative", "(Ljava/lang/String;JJIIIII)I", (void*)latinime_BinaryDictionary_open}, {"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close}, {"getSuggestionsNative", "(I[II[C[I[II)I", (void*)latinime_BinaryDictionary_getSuggestions}, {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}, @@ -132,11 +220,11 @@ static int registerNativeMethods(JNIEnv* env, const char* className, JNINativeMe clazz = env->FindClass(className); if (clazz == NULL) { - fprintf(stderr, "Native registration unable to find class '%s'\n", className); + LOGE("Native registration unable to find class '%s'", className); return JNI_FALSE; } if (env->RegisterNatives(clazz, gMethods, numMethods) < 0) { - fprintf(stderr, "RegisterNatives failed for '%s'\n", className); + LOGE("RegisterNatives failed for '%s'", className); return JNI_FALSE; } @@ -157,13 +245,13 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) { jint result = -1; if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) { - fprintf(stderr, "ERROR: GetEnv failed\n"); + LOGE("ERROR: GetEnv failed"); goto bail; } assert(env != NULL); if (!registerNatives(env)) { - fprintf(stderr, "ERROR: BinaryDictionary native registration failed\n"); + LOGE("ERROR: BinaryDictionary native registration failed"); goto bail; } diff --git a/native/src/bigram_dictionary.cpp b/native/src/bigram_dictionary.cpp index eebd69b71..5ec310f07 100644 --- a/native/src/bigram_dictionary.cpp +++ b/native/src/bigram_dictionary.cpp @@ -31,7 +31,7 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion), HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) { if (DEBUG_DICT) LOGI("BigramDictionary - constructor"); - if (DEBUG_DICT) LOGI("Has Bigram : %d \n", hasBigram); + if (DEBUG_DICT) LOGI("Has Bigram : %d", hasBigram); } BigramDictionary::~BigramDictionary() { @@ -42,7 +42,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ if (DEBUG_DICT) { char s[length + 1]; for (int i = 0; i <= length; i++) s[i] = word[i]; - LOGI("Bigram: Found word = %s, freq = %d : \n", s, frequency); + LOGI("Bigram: Found word = %s, freq = %d :", s, frequency); } // Find the right insertion point @@ -54,7 +54,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ } insertAt++; } - if (DEBUG_DICT) LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams); + if (DEBUG_DICT) LOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams); if (insertAt < mMaxBigrams) { memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]), (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]), @@ -68,7 +68,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ *dest++ = *word++; } *dest = 0; // NULL terminate - if (DEBUG_DICT) LOGI("Bigram: Added word at %d\n", insertAt); + if (DEBUG_DICT) LOGI("Bigram: Added word at %d", insertAt); return true; } return false; @@ -107,7 +107,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) { int pos = mParentDictionary->isValidWordRec( DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength); - if (DEBUG_DICT) LOGI("Pos -> %d\n", pos); + if (DEBUG_DICT) LOGI("Pos -> %d", pos); if (pos < 0) { return 0; } @@ -151,7 +151,7 @@ void BigramDictionary::searchForTerminalNode(int addressLookingFor, int frequenc } pos = followDownBranchAddress; // pos start at count int count = DICT[pos] & 0xFF; - if (DEBUG_DICT) LOGI("count - %d\n",count); + if (DEBUG_DICT) LOGI("count - %d",count); pos++; for (int i = 0; i < count; i++) { // pos at data diff --git a/native/src/defines.h b/native/src/defines.h index 59eaa4102..71aaf28ae 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -36,45 +36,47 @@ static double profile_buf[PROF_BUF_SIZE]; static double profile_old[PROF_BUF_SIZE]; static unsigned int profile_counter[PROF_BUF_SIZE]; -#define PROF_RESET prof_reset(); -#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id]; -#define PROF_OPEN PROF_RESET;PROF_START(PROF_BUF_SIZE - 1); -#define PROF_START(prof_buf_id) PROF_COUNT(prof_buf_id);profile_old[prof_buf_id] = (clock()); -#define PROF_CLOSE PROF_END(PROF_BUF_SIZE - 1);PROF_OUTALL; -#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]); -#define PROF_CLOCKOUT(prof_buf_id) LOGI("%s : clock is %f", __FUNCTION__,\ - (clock() - profile_old[prof_buf_id])); -#define PROF_OUTALL LOGI("--- %s ---", __FUNCTION__); prof_out(); +#define PROF_RESET prof_reset() +#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id] +#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while(0) +#define PROF_START(prof_buf_id) do { \ + PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while(0) +#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while(0) +#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]) +#define PROF_CLOCKOUT(prof_buf_id) \ + LOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id])) +#define PROF_OUTALL do { LOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0) -static void prof_reset(void){ - for(int i = 0;i < PROF_BUF_SIZE;++i){ +static void prof_reset(void) { + for (int i = 0; i < PROF_BUF_SIZE; ++i) { profile_buf[i] = 0; profile_old[i] = 0; profile_counter[i] = 0; } } -static void prof_out(void){ +static void prof_out(void) { if (profile_counter[PROF_BUF_SIZE - 1] != 1) { LOGI("Error: You must call PROF_OPEN before PROF_CLOSE."); } LOGI("Total time is %6.3f ms.", - profile_buf[PROF_BUF_SIZE - 1] * 1000 / (double) CLOCKS_PER_SEC); + profile_buf[PROF_BUF_SIZE - 1] * 1000 / (double)CLOCKS_PER_SEC); double all = 0; - for(int i = 0; i < PROF_BUF_SIZE - 1; ++i){ + for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { all += profile_buf[i]; } - if(all == 0) all = 1; - for(int i = 0; i < PROF_BUF_SIZE - 1; ++i){ - if(profile_buf[i] != 0) { + if (all == 0) all = 1; + for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { + if (profile_buf[i] != 0) { LOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", - i, (profile_buf[i] * 100 /all), - profile_buf[i] * 1000 / (double) CLOCKS_PER_SEC, profile_counter[i]); - } + i, (profile_buf[i] * 100 / all), + profile_buf[i] * 1000 / (double)CLOCKS_PER_SEC, profile_counter[i]); + } } } #else // FLAG_DBG +#define LOGE #define LOGI #define DEBUG_DICT false #define DEBUG_DICT_FULL false @@ -99,6 +101,11 @@ static void prof_out(void){ #define U_SHORT_MAX 1 << 16 #endif +// Define this to use mmap() for dictionary loading. Undefine to use malloc() instead of mmap(). +// We measured and compared performance of both, and found mmap() is fairly good in terms of +// loading time, and acceptable even for several initial lookups which involve page faults. +#define USE_MMAP_FOR_DICTIONARY + // 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words #define ADDRESS_MASK 0x3FFFFF diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index 8d3290945..fe3375706 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -23,21 +23,23 @@ namespace latinime { -Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier, +Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, + int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives) - : DICT((unsigned char*) dict), + : mDict((unsigned char*) dict), mDictSize(dictSize), + mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust), // Checks whether it has the latest dictionary or the old dictionary IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) { if (DEBUG_DICT) { if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) { LOGI("Max word length (%d) is greater than %d", maxWordLength, MAX_WORD_LENGTH_INTERNAL); - LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF)); + LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF)); } } - mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier, + mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION); - mBigramDictionary = new BigramDictionary(DICT, maxWordLength, maxAlternatives, + mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives, IS_LATEST_DICT_VERSION, hasBigram(), this); } @@ -47,7 +49,7 @@ Dictionary::~Dictionary() { } bool Dictionary::hasBigram() { - return ((DICT[1] & 0xFF) == 1); + return ((mDict[1] & 0xFF) == 1); } // TODO: use uint16_t instead of unsigned short @@ -64,12 +66,12 @@ int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int le // returns address of bigram data of that word // return -99 if not found - int count = Dictionary::getCount(DICT, &pos); + int count = Dictionary::getCount(mDict, &pos); unsigned short currentChar = (unsigned short) word[offset]; for (int j = 0; j < count; j++) { - unsigned short c = Dictionary::getChar(DICT, &pos); - int terminal = Dictionary::getTerminal(DICT, &pos); - int childPos = Dictionary::getAddress(DICT, &pos); + unsigned short c = Dictionary::getChar(mDict, &pos); + int terminal = Dictionary::getTerminal(mDict, &pos); + int childPos = Dictionary::getAddress(mDict, &pos); if (c == currentChar) { if (offset == length - 1) { if (terminal) { @@ -85,7 +87,7 @@ int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int le } } if (terminal) { - Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos); + Dictionary::getFreq(mDict, IS_LATEST_DICT_VERSION, &pos); } // There could be two instances of each alphabet - upper and lower case. So continue // looking ... diff --git a/native/src/dictionary.h b/native/src/dictionary.h index da876242d..cef1cf9eb 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -25,8 +25,8 @@ namespace latinime { class Dictionary { public: - Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, - int maxWords, int maxAlternatives); + Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler, + int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives); int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies, int *nextLetters, int nextLettersSize) { return mUnigramDictionary->getSuggestions(codes, codesSize, outWords, frequencies, @@ -42,8 +42,10 @@ public: } bool isValidWord(unsigned short *word, int length); int isValidWordRec(int pos, unsigned short *word, int offset, int length); - void setAsset(void *asset) { mAsset = asset; } - void *getAsset() { return mAsset; } + void *getDict() { return (void *)mDict; } + int getDictSize() { return mDictSize; } + int getMmapFd() { return mMmapFd; } + int getDictBufAdjust() { return mDictBufAdjust; } ~Dictionary(); // public static utility methods @@ -62,11 +64,17 @@ public: private: bool hasBigram(); - const unsigned char *DICT; + const unsigned char *mDict; + + // Used only for the mmap version of dictionary loading, but we use these as dummy variables + // also for the malloc version. + const int mDictSize; + const int mMmapFd; + const int mDictBufAdjust; + const bool IS_LATEST_DICT_VERSION; - void *mAsset; - BigramDictionary *mBigramDictionary; UnigramDictionary *mUnigramDictionary; + BigramDictionary *mBigramDictionary; }; // ---------------------------------------------------------------------------- diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index af2cc97fc..3f9bcd758 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -113,7 +113,6 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short LOGI("%c = %d,", k, nextLetters[k]); } } - LOGI("\n"); } PROF_END(6); PROF_CLOSE; diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 445ff7a17..7f7b7bd21 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -80,13 +80,13 @@ private: bool existsAdjacentProximityChars(const int inputIndex, const int inputLength); int* getInputCharsAt(const int index) {return mInputCodes + (index * MAX_PROXIMITY_CHARS);} const unsigned char *DICT; - const int MAX_WORDS; const int MAX_WORD_LENGTH; + const int MAX_WORDS; const int MAX_PROXIMITY_CHARS; const bool IS_LATEST_DICT_VERSION; - const int ROOT_POS; const int TYPED_LETTER_MULTIPLIER; const int FULL_WORD_MULTIPLIER; + const int ROOT_POS; int *mFrequencies; unsigned short *mOutputChars; diff --git a/tests/src/com/android/inputmethod/latin/SuggestHelper.java b/tests/src/com/android/inputmethod/latin/SuggestHelper.java index 7254520d5..de898c3ea 100644 --- a/tests/src/com/android/inputmethod/latin/SuggestHelper.java +++ b/tests/src/com/android/inputmethod/latin/SuggestHelper.java @@ -38,49 +38,15 @@ public class SuggestHelper { private final String TAG; /** Uses main dictionary only **/ - public SuggestHelper(String tag, Context context, int[] resId) { + public SuggestHelper(String tag, Context context, int resId) { TAG = tag; - InputStream[] is = null; - try { - // merging separated dictionary into one if dictionary is separated - int total = 0; - is = new InputStream[resId.length]; - for (int i = 0; i < resId.length; i++) { - is[i] = context.getResources().openRawResource(resId[i]); - total += is[i].available(); - } - - ByteBuffer byteBuffer = - ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder()); - int got = 0; - for (int i = 0; i < resId.length; i++) { - got += Channels.newChannel(is[i]).read(byteBuffer); - } - if (got != total) { - Log.w(TAG, "Read " + got + " bytes, expected " + total); - } else { - mSuggest = new Suggest(context, byteBuffer); - Log.i(TAG, "Created mSuggest " + total + " bytes"); - } - } catch (IOException e) { - Log.w(TAG, "No available memory for binary dictionary"); - } finally { - try { - if (is != null) { - for (int i = 0; i < is.length; i++) { - is[i].close(); - } - } - } catch (IOException e) { - Log.w(TAG, "Failed to close input stream"); - } - } + mSuggest = new Suggest(context, resId); mSuggest.setAutoTextEnabled(false); mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM); } /** Uses both main dictionary and user-bigram dictionary **/ - public SuggestHelper(String tag, Context context, int[] resId, int userBigramMax, + public SuggestHelper(String tag, Context context, int resId, int userBigramMax, int userBigramDelete) { this(tag, context, resId); mUserBigram = new UserBigramDictionary(context, null, Locale.US.toString(), diff --git a/tests/src/com/android/inputmethod/latin/SuggestPerformanceTests.java b/tests/src/com/android/inputmethod/latin/SuggestPerformanceTests.java index 7eb66d502..c5913ab4f 100644 --- a/tests/src/com/android/inputmethod/latin/SuggestPerformanceTests.java +++ b/tests/src/com/android/inputmethod/latin/SuggestPerformanceTests.java @@ -36,9 +36,9 @@ public class SuggestPerformanceTests extends AndroidTestCase { // For testing with real dictionary, TEMPORARILY COPY main dictionary into test directory. // DO NOT SUBMIT real dictionary under test directory. - //int[] resId = new int[] { R.raw.main0, R.raw.main1, R.raw.main2 }; + //int resId = R.raw.main; - int[] resId = new int[] { R.raw.test }; + int resId = R.raw.test; sh = new SuggestHelper(TAG, getTestContext(), resId); loadString(); diff --git a/tests/src/com/android/inputmethod/latin/SuggestTests.java b/tests/src/com/android/inputmethod/latin/SuggestTests.java index 33462dccf..c890394d0 100644 --- a/tests/src/com/android/inputmethod/latin/SuggestTests.java +++ b/tests/src/com/android/inputmethod/latin/SuggestTests.java @@ -26,7 +26,7 @@ public class SuggestTests extends AndroidTestCase { @Override protected void setUp() { - int[] resId = new int[] { R.raw.test }; + int resId = R.raw.test; sh = new SuggestHelper(TAG, getTestContext(), resId); } diff --git a/tests/src/com/android/inputmethod/latin/UserBigramTests.java b/tests/src/com/android/inputmethod/latin/UserBigramTests.java index cbf7bd8e1..af527b02d 100644 --- a/tests/src/com/android/inputmethod/latin/UserBigramTests.java +++ b/tests/src/com/android/inputmethod/latin/UserBigramTests.java @@ -31,7 +31,7 @@ public class UserBigramTests extends AndroidTestCase { @Override protected void setUp() { - int[] resId = new int[] { R.raw.test }; + int resId = R.raw.test; sh = new SuggestHelper(TAG, getTestContext(), resId, MAX_DATA, DELETE_DATA); }