Load main dic in native

Follow up to Id57dce51

bug: 3219819
Change-Id: I00e11ef21d0252ffa88c12dffb9c55b0f2e19a66
main
Ken Wakasa 2011-01-07 15:01:51 +09:00
parent f16028b92e
commit e90b333017
14 changed files with 232 additions and 204 deletions

View File

@ -1,12 +1,12 @@
/* /*
* Copyright (C) 2008 The Android Open Source Project * Copyright (C) 2008 The Android Open Source Project
* *
* Licensed under the Apache License, Version 2.0 (the "License"); you may not * Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of * use this file except in compliance with the License. You may obtain a copy of
* the License at * the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@ -17,13 +17,9 @@
package com.android.inputmethod.latin; package com.android.inputmethod.latin;
import android.content.Context; import android.content.Context;
import android.content.res.AssetFileDescriptor;
import android.util.Log; import android.util.Log;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.Channels;
import java.util.Arrays; import java.util.Arrays;
/** /**
@ -48,15 +44,12 @@ public class BinaryDictionary extends Dictionary {
private int mDicTypeId; private int mDicTypeId;
private int mNativeDict; private int mNativeDict;
private int mDictLength; private long mDictLength;
private final int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES]; private final int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES];
private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS];
private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS];
private final int[] mFrequencies = new int[MAX_WORDS]; private final int[] mFrequencies = new int[MAX_WORDS];
private final int[] mFrequencies_bigrams = new int[MAX_BIGRAMS]; private final int[] mFrequencies_bigrams = new int[MAX_BIGRAMS];
// Keep a reference to the native dict direct buffer in Java to avoid
// unexpected deallocation of the direct buffer.
private ByteBuffer mNativeDictDirectBuffer;
static { static {
try { try {
@ -78,30 +71,9 @@ public class BinaryDictionary extends Dictionary {
mDicTypeId = dicTypeId; mDicTypeId = dicTypeId;
} }
/** private native int openNative(String apkFileName, long dictOffset, long dictSize,
* Create a dictionary from a byte buffer. This is used for testing. int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength,
* @param context application context for reading resources int maxWords, int maxAlternatives);
* @param byteBuffer a ByteBuffer containing the binary dictionary
*/
public BinaryDictionary(Context context, ByteBuffer byteBuffer, int dicTypeId) {
if (byteBuffer != null) {
if (byteBuffer.isDirect()) {
mNativeDictDirectBuffer = byteBuffer;
} else {
mNativeDictDirectBuffer = ByteBuffer.allocateDirect(byteBuffer.capacity());
byteBuffer.rewind();
mNativeDictDirectBuffer.put(byteBuffer);
}
mDictLength = byteBuffer.capacity();
mNativeDict = openNative(mNativeDictDirectBuffer,
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER,
MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES);
}
mDicTypeId = dicTypeId;
}
private native int openNative(ByteBuffer bb, int typedLetterMultiplier,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives);
private native void closeNative(int dict); private native void closeNative(int dict);
private native boolean isValidWordNative(int nativeData, char[] word, int wordLength); private native boolean isValidWordNative(int nativeData, char[] word, int wordLength);
private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize, private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize,
@ -112,37 +84,17 @@ public class BinaryDictionary extends Dictionary {
int maxWordLength, int maxBigrams, int maxAlternatives); int maxWordLength, int maxBigrams, int maxAlternatives);
private final void loadDictionary(Context context, int resId) { private final void loadDictionary(Context context, int resId) {
InputStream is = null; final AssetFileDescriptor afd = context.getResources().openRawResourceFd(resId);
try { mNativeDict = openNative(context.getApplicationInfo().sourceDir,
is = context.getResources().openRawResource(resId); afd.getStartOffset(), afd.getLength(),
final int total = is.available(); TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER,
mNativeDictDirectBuffer = MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES);
ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder()); mDictLength = afd.getLength();
final int got = Channels.newChannel(is).read(mNativeDictDirectBuffer);
if (got != total) {
Log.e(TAG, "Read " + got + " bytes, expected " + total);
} else {
mNativeDict = openNative(mNativeDictDirectBuffer,
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER,
MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES);
mDictLength = total;
}
} catch (IOException e) {
Log.w(TAG, "No available memory for binary dictionary");
} finally {
try {
if (is != null) is.close();
} catch (IOException e) {
Log.w(TAG, "Failed to close input stream");
}
}
} }
@Override @Override
public void getBigrams(final WordComposer codes, final CharSequence previousWord, public void getBigrams(final WordComposer codes, final CharSequence previousWord,
final WordCallback callback, int[] nextLettersFrequencies) { final WordCallback callback, int[] nextLettersFrequencies) {
char[] chars = previousWord.toString().toCharArray(); char[] chars = previousWord.toString().toCharArray();
Arrays.fill(mOutputChars_bigrams, (char) 0); Arrays.fill(mOutputChars_bigrams, (char) 0);
Arrays.fill(mFrequencies_bigrams, 0); Arrays.fill(mFrequencies_bigrams, 0);
@ -212,7 +164,7 @@ public class BinaryDictionary extends Dictionary {
return isValidWordNative(mNativeDict, chars, chars.length); return isValidWordNative(mNativeDict, chars, chars.length);
} }
public int getSize() { public long getSize() {
return mDictLength; // This value is initialized on the call to openNative() return mDictLength; // This value is initialized on the call to openNative()
} }
@ -221,6 +173,7 @@ public class BinaryDictionary extends Dictionary {
if (mNativeDict != 0) { if (mNativeDict != 0) {
closeNative(mNativeDict); closeNative(mNativeDict);
mNativeDict = 0; mNativeDict = 0;
mDictLength = 0;
} }
} }

View File

@ -371,23 +371,19 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
int mainDicResId = getMainDictionaryResourceId(res); int mainDicResId = getMainDictionaryResourceId(res);
mSuggest = new Suggest(this, mainDicResId); mSuggest = new Suggest(this, mainDicResId);
loadAndSetAutoCorrectionThreshold(prefs); loadAndSetAutoCorrectionThreshold(prefs);
if (mUserDictionary != null) mUserDictionary.close();
mUserDictionary = new UserDictionary(this, locale); mUserDictionary = new UserDictionary(this, locale);
if (mContactsDictionary == null) { mSuggest.setUserDictionary(mUserDictionary);
mContactsDictionary = new ContactsDictionary(this, Suggest.DIC_CONTACTS);
} mContactsDictionary = new ContactsDictionary(this, Suggest.DIC_CONTACTS);
if (mAutoDictionary != null) { mSuggest.setContactsDictionary(mContactsDictionary);
mAutoDictionary.close();
}
mAutoDictionary = new AutoDictionary(this, this, locale, Suggest.DIC_AUTO); mAutoDictionary = new AutoDictionary(this, this, locale, Suggest.DIC_AUTO);
if (mUserBigramDictionary != null) { mSuggest.setAutoDictionary(mAutoDictionary);
mUserBigramDictionary.close();
}
mUserBigramDictionary = new UserBigramDictionary(this, this, locale, Suggest.DIC_USER); mUserBigramDictionary = new UserBigramDictionary(this, this, locale, Suggest.DIC_USER);
mSuggest.setUserBigramDictionary(mUserBigramDictionary); mSuggest.setUserBigramDictionary(mUserBigramDictionary);
mSuggest.setUserDictionary(mUserDictionary);
mSuggest.setContactsDictionary(mContactsDictionary);
mSuggest.setAutoDictionary(mAutoDictionary);
updateCorrectionMode(); updateCorrectionMode();
mWordSeparators = res.getString(R.string.word_separators); mWordSeparators = res.getString(R.string.word_separators);
mSentenceSeparators = res.getString(R.string.sentence_separators); mSentenceSeparators = res.getString(R.string.sentence_separators);
@ -397,11 +393,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
@Override @Override
public void onDestroy() { public void onDestroy() {
if (mUserDictionary != null) { if (mSuggest != null) {
mUserDictionary.close(); mSuggest.close();
} mSuggest = null;
if (mContactsDictionary != null) {
mContactsDictionary.close();
} }
unregisterReceiver(mReceiver); unregisterReceiver(mReceiver);
mVoiceConnector.destroy(); mVoiceConnector.destroy();

View File

@ -1,12 +1,12 @@
/* /*
* Copyright (C) 2008 The Android Open Source Project * Copyright (C) 2008 The Android Open Source Project
* *
* Licensed under the Apache License, Version 2.0 (the "License"); you may not * Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of * use this file except in compliance with the License. You may obtain a copy of
* the License at * the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@ -22,12 +22,11 @@ import android.text.TextUtils;
import android.util.Log; import android.util.Log;
import android.view.View; import android.view.View;
import java.nio.ByteBuffer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
/** /**
* This class loads a dictionary and provides a list of suggestions for a given sequence of * This class loads a dictionary and provides a list of suggestions for a given sequence of
* characters. This includes corrections and completions. * characters. This includes corrections and completions.
*/ */
public class Suggest implements Dictionary.WordCallback { public class Suggest implements Dictionary.WordCallback {
@ -108,11 +107,6 @@ public class Suggest implements Dictionary.WordCallback {
initPool(); initPool();
} }
public Suggest(Context context, ByteBuffer byteBuffer) {
mMainDict = new BinaryDictionary(context, byteBuffer, DIC_MAIN);
initPool();
}
private void initPool() { private void initPool() {
for (int i = 0; i < mPrefMaxSuggestions; i++) { for (int i = 0; i < mPrefMaxSuggestions; i++) {
StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
@ -154,7 +148,7 @@ public class Suggest implements Dictionary.WordCallback {
public void setContactsDictionary(Dictionary userDictionary) { public void setContactsDictionary(Dictionary userDictionary) {
mContactsDictionary = userDictionary; mContactsDictionary = userDictionary;
} }
public void setAutoDictionary(Dictionary autoDictionary) { public void setAutoDictionary(Dictionary autoDictionary) {
mAutoDictionary = autoDictionary; mAutoDictionary = autoDictionary;
} }
@ -232,7 +226,7 @@ public class Suggest implements Dictionary.WordCallback {
if (!TextUtils.isEmpty(prevWordForBigram)) { if (!TextUtils.isEmpty(prevWordForBigram)) {
CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
if (mMainDict.isValidWord(lowerPrevWord)) { if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
prevWordForBigram = lowerPrevWord; prevWordForBigram = lowerPrevWord;
} }
if (mUserBigramDictionary != null) { if (mUserBigramDictionary != null) {
@ -383,7 +377,7 @@ public class Suggest implements Dictionary.WordCallback {
return mHaveCorrection; return mHaveCorrection;
} }
private boolean compareCaseInsensitive(final String mLowerOriginalWord, private boolean compareCaseInsensitive(final String mLowerOriginalWord,
final char[] word, final int offset, final int length) { final char[] word, final int offset, final int length) {
final int originalLength = mLowerOriginalWord.length(); final int originalLength = mLowerOriginalWord.length();
if (originalLength == length && Character.isUpperCase(word[offset])) { if (originalLength == length && Character.isUpperCase(word[offset])) {
@ -456,7 +450,7 @@ public class Suggest implements Dictionary.WordCallback {
System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1); System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1);
priorities[pos] = freq; priorities[pos] = freq;
int poolSize = mStringPool.size(); int poolSize = mStringPool.size();
StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
: new StringBuilder(getApproxMaxWordLength()); : new StringBuilder(getApproxMaxWordLength());
sb.setLength(0); sb.setLength(0);
if (mIsAllUpperCase) { if (mIsAllUpperCase) {
@ -510,7 +504,7 @@ public class Suggest implements Dictionary.WordCallback {
|| (mAutoDictionary != null && mAutoDictionary.isValidWord(word)) || (mAutoDictionary != null && mAutoDictionary.isValidWord(word))
|| (mContactsDictionary != null && mContactsDictionary.isValidWord(word)); || (mContactsDictionary != null && mContactsDictionary.isValidWord(word));
} }
private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) { private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) {
int poolSize = mStringPool.size(); int poolSize = mStringPool.size();
int garbageSize = suggestions.size(); int garbageSize = suggestions.size();
@ -531,6 +525,23 @@ public class Suggest implements Dictionary.WordCallback {
public void close() { public void close() {
if (mMainDict != null) { if (mMainDict != null) {
mMainDict.close(); mMainDict.close();
mMainDict = null;
}
if (mUserDictionary != null) {
mUserDictionary.close();
mUserDictionary = null;
}
if (mUserBigramDictionary != null) {
mUserBigramDictionary.close();
mUserBigramDictionary = null;
}
if (mContactsDictionary != null) {
mContactsDictionary.close();
mContactsDictionary = null;
}
if (mAutoDictionary != null) {
mAutoDictionary.close();
mAutoDictionary = null;
} }
} }
} }

View File

@ -15,12 +15,24 @@
** limitations under the License. ** limitations under the License.
*/ */
#define LOG_TAG "LatinIME: jni"
#include "dictionary.h" #include "dictionary.h"
#include "jni.h" #include "jni.h"
#include <assert.h> #include <assert.h>
#include <errno.h>
#include <stdio.h> #include <stdio.h>
#ifdef USE_MMAP_FOR_DICTIONARY
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#else // USE_MMAP_FOR_DICTIONARY
#include <stdlib.h>
#endif // USE_MMAP_FOR_DICTIONARY
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
using namespace latinime; using namespace latinime;
@ -37,24 +49,84 @@ static void throwException(JNIEnv *env, const char* ex, const char* fmt, int dat
} }
} }
static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object, jobject dictDirectBuffer, static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
jstring apkFileName, jlong dictOffset, jlong dictSize,
jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords, jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords,
jint maxAlternatives) { jint maxAlternatives) {
void *dict = env->GetDirectBufferAddress(dictDirectBuffer); PROF_OPEN;
if (dict == NULL) { PROF_START(66);
fprintf(stderr, "DICT: Dictionary buffer is null\n"); const char *apkFileNameChars = env->GetStringUTFChars(apkFileName, NULL);
if (apkFileNameChars == NULL) {
LOGE("DICT: Can't get apk file name");
return 0; return 0;
} }
Dictionary *dictionary = new Dictionary(dict, typedLetterMultiplier, fullWordMultiplier, int fd = 0;
maxWordLength, maxWords, maxAlternatives); void *dictBuf = NULL;
return (jint) dictionary; int adjust = 0;
#ifdef USE_MMAP_FOR_DICTIONARY
/* mmap version */
fd = open(apkFileNameChars, O_RDONLY);
if (fd < 0) {
LOGE("DICT: Can't open apk file. errno=%d", errno);
return 0;
}
int pagesize = getpagesize();
adjust = dictOffset % pagesize;
int adjDictOffset = dictOffset - adjust;
int adjDictSize = dictSize + adjust;
dictBuf = mmap(NULL, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
if (dictBuf == MAP_FAILED) {
LOGE("DICT: Can't mmap dictionary file. errno=%d", errno);
return 0;
}
dictBuf = (void *)((char *)dictBuf + adjust);
#else // USE_MMAP_FOR_DICTIONARY
/* malloc version */
FILE *file = NULL;
file = fopen(apkFileNameChars, "rb");
if (file == NULL) {
LOGE("DICT: Can't fopen apk file. errno=%d", errno);
return 0;
}
dictBuf = malloc(sizeof(char) * dictSize);
if (dictBuf == NULL) {
LOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno);
return 0;
}
int ret = fseek(file, (long)dictOffset, SEEK_SET);
if (ret != 0) {
LOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno);
return 0;
}
ret = fread(dictBuf, sizeof(char) * dictSize, 1, file);
if (ret != 1) {
LOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno);
return 0;
}
ret = fclose(file);
if (ret != 0) {
LOGE("DICT: Failure in fclose. ret=%d errno=%d", ret, errno);
return 0;
}
#endif // USE_MMAP_FOR_DICTIONARY
env->ReleaseStringUTFChars(apkFileName, apkFileNameChars);
if (!dictBuf) {
LOGE("DICT: dictBuf is null");
return 0;
}
Dictionary *dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier,
fullWordMultiplier, maxWordLength, maxWords, maxAlternatives);
PROF_END(66);
PROF_CLOSE;
return (jint)dictionary;
} }
static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jint dict, static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jint dict,
jintArray inputArray, jint arraySize, jcharArray outputArray, jintArray frequencyArray, jintArray inputArray, jint arraySize, jcharArray outputArray, jintArray frequencyArray,
jintArray nextLettersArray, jint nextLettersSize) { jintArray nextLettersArray, jint nextLettersSize) {
Dictionary *dictionary = (Dictionary*) dict; Dictionary *dictionary = (Dictionary*)dict;
if (dictionary == NULL) return 0; if (!dictionary) return 0;
int *frequencies = env->GetIntArrayElements(frequencyArray, NULL); int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
int *inputCodes = env->GetIntArrayElements(inputArray, NULL); int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
@ -79,8 +151,8 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jin
jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize, jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams, jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams,
jint maxAlternatives) { jint maxAlternatives) {
Dictionary *dictionary = (Dictionary*) dict; Dictionary *dictionary = (Dictionary*)dict;
if (dictionary == NULL) return 0; if (!dictionary) return 0;
jchar *prevWord = env->GetCharArrayElements(prevWordArray, NULL); jchar *prevWord = env->GetCharArrayElements(prevWordArray, NULL);
int *inputCodes = env->GetIntArrayElements(inputArray, NULL); int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
@ -99,11 +171,10 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jin
return count; return count;
} }
static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jint dict, static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jint dict,
jcharArray wordArray, jint wordLength) { jcharArray wordArray, jint wordLength) {
Dictionary *dictionary = (Dictionary*) dict; Dictionary *dictionary = (Dictionary*)dict;
if (dictionary == NULL) return (jboolean) false; if (!dictionary) return (jboolean) false;
jchar *word = env->GetCharArrayElements(wordArray, NULL); jchar *word = env->GetCharArrayElements(wordArray, NULL);
jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength); jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength);
@ -113,13 +184,30 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec
} }
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jint dict) { static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jint dict) {
delete (Dictionary*) dict; Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return;
void *dictBuf = dictionary->getDict();
if (!dictBuf) return;
#ifdef USE_MMAP_FOR_DICTIONARY
int ret = munmap((void *)((char *)dictBuf - dictionary->getDictBufAdjust()),
dictionary->getDictSize() + dictionary->getDictBufAdjust());
if (ret != 0) {
LOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
}
ret = close(dictionary->getMmapFd());
if (ret != 0) {
LOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
}
#else // USE_MMAP_FOR_DICTIONARY
free(dictBuf);
#endif // USE_MMAP_FOR_DICTIONARY
delete dictionary;
} }
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
static JNINativeMethod gMethods[] = { static JNINativeMethod gMethods[] = {
{"openNative", "(Ljava/nio/ByteBuffer;IIIII)I", (void*)latinime_BinaryDictionary_open}, {"openNative", "(Ljava/lang/String;JJIIIII)I", (void*)latinime_BinaryDictionary_open},
{"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close}, {"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close},
{"getSuggestionsNative", "(I[II[C[I[II)I", (void*)latinime_BinaryDictionary_getSuggestions}, {"getSuggestionsNative", "(I[II[C[I[II)I", (void*)latinime_BinaryDictionary_getSuggestions},
{"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}, {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
@ -132,11 +220,11 @@ static int registerNativeMethods(JNIEnv* env, const char* className, JNINativeMe
clazz = env->FindClass(className); clazz = env->FindClass(className);
if (clazz == NULL) { if (clazz == NULL) {
fprintf(stderr, "Native registration unable to find class '%s'\n", className); LOGE("Native registration unable to find class '%s'", className);
return JNI_FALSE; return JNI_FALSE;
} }
if (env->RegisterNatives(clazz, gMethods, numMethods) < 0) { if (env->RegisterNatives(clazz, gMethods, numMethods) < 0) {
fprintf(stderr, "RegisterNatives failed for '%s'\n", className); LOGE("RegisterNatives failed for '%s'", className);
return JNI_FALSE; return JNI_FALSE;
} }
@ -157,13 +245,13 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) {
jint result = -1; jint result = -1;
if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) { if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) {
fprintf(stderr, "ERROR: GetEnv failed\n"); LOGE("ERROR: GetEnv failed");
goto bail; goto bail;
} }
assert(env != NULL); assert(env != NULL);
if (!registerNatives(env)) { if (!registerNatives(env)) {
fprintf(stderr, "ERROR: BinaryDictionary native registration failed\n"); LOGE("ERROR: BinaryDictionary native registration failed");
goto bail; goto bail;
} }

View File

@ -31,7 +31,7 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion), MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion),
HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) { HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) {
if (DEBUG_DICT) LOGI("BigramDictionary - constructor"); if (DEBUG_DICT) LOGI("BigramDictionary - constructor");
if (DEBUG_DICT) LOGI("Has Bigram : %d \n", hasBigram); if (DEBUG_DICT) LOGI("Has Bigram : %d", hasBigram);
} }
BigramDictionary::~BigramDictionary() { BigramDictionary::~BigramDictionary() {
@ -42,7 +42,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
if (DEBUG_DICT) { if (DEBUG_DICT) {
char s[length + 1]; char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i]; for (int i = 0; i <= length; i++) s[i] = word[i];
LOGI("Bigram: Found word = %s, freq = %d : \n", s, frequency); LOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
} }
// Find the right insertion point // Find the right insertion point
@ -54,7 +54,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
} }
insertAt++; insertAt++;
} }
if (DEBUG_DICT) LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams); if (DEBUG_DICT) LOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams);
if (insertAt < mMaxBigrams) { if (insertAt < mMaxBigrams) {
memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]), memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
(char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]), (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]),
@ -68,7 +68,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
*dest++ = *word++; *dest++ = *word++;
} }
*dest = 0; // NULL terminate *dest = 0; // NULL terminate
if (DEBUG_DICT) LOGI("Bigram: Added word at %d\n", insertAt); if (DEBUG_DICT) LOGI("Bigram: Added word at %d", insertAt);
return true; return true;
} }
return false; return false;
@ -107,7 +107,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i
if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) { if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) {
int pos = mParentDictionary->isValidWordRec( int pos = mParentDictionary->isValidWordRec(
DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength); DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength);
if (DEBUG_DICT) LOGI("Pos -> %d\n", pos); if (DEBUG_DICT) LOGI("Pos -> %d", pos);
if (pos < 0) { if (pos < 0) {
return 0; return 0;
} }
@ -151,7 +151,7 @@ void BigramDictionary::searchForTerminalNode(int addressLookingFor, int frequenc
} }
pos = followDownBranchAddress; // pos start at count pos = followDownBranchAddress; // pos start at count
int count = DICT[pos] & 0xFF; int count = DICT[pos] & 0xFF;
if (DEBUG_DICT) LOGI("count - %d\n",count); if (DEBUG_DICT) LOGI("count - %d",count);
pos++; pos++;
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
// pos at data // pos at data

View File

@ -36,45 +36,47 @@ static double profile_buf[PROF_BUF_SIZE];
static double profile_old[PROF_BUF_SIZE]; static double profile_old[PROF_BUF_SIZE];
static unsigned int profile_counter[PROF_BUF_SIZE]; static unsigned int profile_counter[PROF_BUF_SIZE];
#define PROF_RESET prof_reset(); #define PROF_RESET prof_reset()
#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id]; #define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id]
#define PROF_OPEN PROF_RESET;PROF_START(PROF_BUF_SIZE - 1); #define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while(0)
#define PROF_START(prof_buf_id) PROF_COUNT(prof_buf_id);profile_old[prof_buf_id] = (clock()); #define PROF_START(prof_buf_id) do { \
#define PROF_CLOSE PROF_END(PROF_BUF_SIZE - 1);PROF_OUTALL; PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while(0)
#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]); #define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while(0)
#define PROF_CLOCKOUT(prof_buf_id) LOGI("%s : clock is %f", __FUNCTION__,\ #define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id])
(clock() - profile_old[prof_buf_id])); #define PROF_CLOCKOUT(prof_buf_id) \
#define PROF_OUTALL LOGI("--- %s ---", __FUNCTION__); prof_out(); LOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
#define PROF_OUTALL do { LOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0)
static void prof_reset(void){ static void prof_reset(void) {
for(int i = 0;i < PROF_BUF_SIZE;++i){ for (int i = 0; i < PROF_BUF_SIZE; ++i) {
profile_buf[i] = 0; profile_buf[i] = 0;
profile_old[i] = 0; profile_old[i] = 0;
profile_counter[i] = 0; profile_counter[i] = 0;
} }
} }
static void prof_out(void){ static void prof_out(void) {
if (profile_counter[PROF_BUF_SIZE - 1] != 1) { if (profile_counter[PROF_BUF_SIZE - 1] != 1) {
LOGI("Error: You must call PROF_OPEN before PROF_CLOSE."); LOGI("Error: You must call PROF_OPEN before PROF_CLOSE.");
} }
LOGI("Total time is %6.3f ms.", LOGI("Total time is %6.3f ms.",
profile_buf[PROF_BUF_SIZE - 1] * 1000 / (double) CLOCKS_PER_SEC); profile_buf[PROF_BUF_SIZE - 1] * 1000 / (double)CLOCKS_PER_SEC);
double all = 0; double all = 0;
for(int i = 0; i < PROF_BUF_SIZE - 1; ++i){ for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
all += profile_buf[i]; all += profile_buf[i];
} }
if(all == 0) all = 1; if (all == 0) all = 1;
for(int i = 0; i < PROF_BUF_SIZE - 1; ++i){ for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
if(profile_buf[i] != 0) { if (profile_buf[i] != 0) {
LOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", LOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
i, (profile_buf[i] * 100 /all), i, (profile_buf[i] * 100 / all),
profile_buf[i] * 1000 / (double) CLOCKS_PER_SEC, profile_counter[i]); profile_buf[i] * 1000 / (double)CLOCKS_PER_SEC, profile_counter[i]);
} }
} }
} }
#else // FLAG_DBG #else // FLAG_DBG
#define LOGE
#define LOGI #define LOGI
#define DEBUG_DICT false #define DEBUG_DICT false
#define DEBUG_DICT_FULL false #define DEBUG_DICT_FULL false
@ -99,6 +101,11 @@ static void prof_out(void){
#define U_SHORT_MAX 1 << 16 #define U_SHORT_MAX 1 << 16
#endif #endif
// Define this to use mmap() for dictionary loading. Undefine to use malloc() instead of mmap().
// We measured and compared performance of both, and found mmap() is fairly good in terms of
// loading time, and acceptable even for several initial lookups which involve page faults.
#define USE_MMAP_FOR_DICTIONARY
// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words // 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
#define ADDRESS_MASK 0x3FFFFF #define ADDRESS_MASK 0x3FFFFF

View File

@ -23,21 +23,23 @@
namespace latinime { namespace latinime {
Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier, Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
int typedLetterMultiplier, int fullWordMultiplier,
int maxWordLength, int maxWords, int maxAlternatives) int maxWordLength, int maxWords, int maxAlternatives)
: DICT((unsigned char*) dict), : mDict((unsigned char*) dict), mDictSize(dictSize),
mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
// Checks whether it has the latest dictionary or the old dictionary // Checks whether it has the latest dictionary or the old dictionary
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) { IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) { if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
LOGI("Max word length (%d) is greater than %d", LOGI("Max word length (%d) is greater than %d",
maxWordLength, MAX_WORD_LENGTH_INTERNAL); maxWordLength, MAX_WORD_LENGTH_INTERNAL);
LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF)); LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
} }
} }
mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier, mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier,
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION); maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
mBigramDictionary = new BigramDictionary(DICT, maxWordLength, maxAlternatives, mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives,
IS_LATEST_DICT_VERSION, hasBigram(), this); IS_LATEST_DICT_VERSION, hasBigram(), this);
} }
@ -47,7 +49,7 @@ Dictionary::~Dictionary() {
} }
bool Dictionary::hasBigram() { bool Dictionary::hasBigram() {
return ((DICT[1] & 0xFF) == 1); return ((mDict[1] & 0xFF) == 1);
} }
// TODO: use uint16_t instead of unsigned short // TODO: use uint16_t instead of unsigned short
@ -64,12 +66,12 @@ int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int le
// returns address of bigram data of that word // returns address of bigram data of that word
// return -99 if not found // return -99 if not found
int count = Dictionary::getCount(DICT, &pos); int count = Dictionary::getCount(mDict, &pos);
unsigned short currentChar = (unsigned short) word[offset]; unsigned short currentChar = (unsigned short) word[offset];
for (int j = 0; j < count; j++) { for (int j = 0; j < count; j++) {
unsigned short c = Dictionary::getChar(DICT, &pos); unsigned short c = Dictionary::getChar(mDict, &pos);
int terminal = Dictionary::getTerminal(DICT, &pos); int terminal = Dictionary::getTerminal(mDict, &pos);
int childPos = Dictionary::getAddress(DICT, &pos); int childPos = Dictionary::getAddress(mDict, &pos);
if (c == currentChar) { if (c == currentChar) {
if (offset == length - 1) { if (offset == length - 1) {
if (terminal) { if (terminal) {
@ -85,7 +87,7 @@ int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int le
} }
} }
if (terminal) { if (terminal) {
Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos); Dictionary::getFreq(mDict, IS_LATEST_DICT_VERSION, &pos);
} }
// There could be two instances of each alphabet - upper and lower case. So continue // There could be two instances of each alphabet - upper and lower case. So continue
// looking ... // looking ...

View File

@ -25,8 +25,8 @@ namespace latinime {
class Dictionary { class Dictionary {
public: public:
Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
int maxWords, int maxAlternatives); int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives);
int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies, int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
int *nextLetters, int nextLettersSize) { int *nextLetters, int nextLettersSize) {
return mUnigramDictionary->getSuggestions(codes, codesSize, outWords, frequencies, return mUnigramDictionary->getSuggestions(codes, codesSize, outWords, frequencies,
@ -42,8 +42,10 @@ public:
} }
bool isValidWord(unsigned short *word, int length); bool isValidWord(unsigned short *word, int length);
int isValidWordRec(int pos, unsigned short *word, int offset, int length); int isValidWordRec(int pos, unsigned short *word, int offset, int length);
void setAsset(void *asset) { mAsset = asset; } void *getDict() { return (void *)mDict; }
void *getAsset() { return mAsset; } int getDictSize() { return mDictSize; }
int getMmapFd() { return mMmapFd; }
int getDictBufAdjust() { return mDictBufAdjust; }
~Dictionary(); ~Dictionary();
// public static utility methods // public static utility methods
@ -62,11 +64,17 @@ public:
private: private:
bool hasBigram(); bool hasBigram();
const unsigned char *DICT; const unsigned char *mDict;
// Used only for the mmap version of dictionary loading, but we use these as dummy variables
// also for the malloc version.
const int mDictSize;
const int mMmapFd;
const int mDictBufAdjust;
const bool IS_LATEST_DICT_VERSION; const bool IS_LATEST_DICT_VERSION;
void *mAsset;
BigramDictionary *mBigramDictionary;
UnigramDictionary *mUnigramDictionary; UnigramDictionary *mUnigramDictionary;
BigramDictionary *mBigramDictionary;
}; };
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------

View File

@ -113,7 +113,6 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
LOGI("%c = %d,", k, nextLetters[k]); LOGI("%c = %d,", k, nextLetters[k]);
} }
} }
LOGI("\n");
} }
PROF_END(6); PROF_END(6);
PROF_CLOSE; PROF_CLOSE;

View File

@ -80,13 +80,13 @@ private:
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength); bool existsAdjacentProximityChars(const int inputIndex, const int inputLength);
int* getInputCharsAt(const int index) {return mInputCodes + (index * MAX_PROXIMITY_CHARS);} int* getInputCharsAt(const int index) {return mInputCodes + (index * MAX_PROXIMITY_CHARS);}
const unsigned char *DICT; const unsigned char *DICT;
const int MAX_WORDS;
const int MAX_WORD_LENGTH; const int MAX_WORD_LENGTH;
const int MAX_WORDS;
const int MAX_PROXIMITY_CHARS; const int MAX_PROXIMITY_CHARS;
const bool IS_LATEST_DICT_VERSION; const bool IS_LATEST_DICT_VERSION;
const int ROOT_POS;
const int TYPED_LETTER_MULTIPLIER; const int TYPED_LETTER_MULTIPLIER;
const int FULL_WORD_MULTIPLIER; const int FULL_WORD_MULTIPLIER;
const int ROOT_POS;
int *mFrequencies; int *mFrequencies;
unsigned short *mOutputChars; unsigned short *mOutputChars;

View File

@ -38,49 +38,15 @@ public class SuggestHelper {
private final String TAG; private final String TAG;
/** Uses main dictionary only **/ /** Uses main dictionary only **/
public SuggestHelper(String tag, Context context, int[] resId) { public SuggestHelper(String tag, Context context, int resId) {
TAG = tag; TAG = tag;
InputStream[] is = null; mSuggest = new Suggest(context, resId);
try {
// merging separated dictionary into one if dictionary is separated
int total = 0;
is = new InputStream[resId.length];
for (int i = 0; i < resId.length; i++) {
is[i] = context.getResources().openRawResource(resId[i]);
total += is[i].available();
}
ByteBuffer byteBuffer =
ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder());
int got = 0;
for (int i = 0; i < resId.length; i++) {
got += Channels.newChannel(is[i]).read(byteBuffer);
}
if (got != total) {
Log.w(TAG, "Read " + got + " bytes, expected " + total);
} else {
mSuggest = new Suggest(context, byteBuffer);
Log.i(TAG, "Created mSuggest " + total + " bytes");
}
} catch (IOException e) {
Log.w(TAG, "No available memory for binary dictionary");
} finally {
try {
if (is != null) {
for (int i = 0; i < is.length; i++) {
is[i].close();
}
}
} catch (IOException e) {
Log.w(TAG, "Failed to close input stream");
}
}
mSuggest.setAutoTextEnabled(false); mSuggest.setAutoTextEnabled(false);
mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM); mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM);
} }
/** Uses both main dictionary and user-bigram dictionary **/ /** Uses both main dictionary and user-bigram dictionary **/
public SuggestHelper(String tag, Context context, int[] resId, int userBigramMax, public SuggestHelper(String tag, Context context, int resId, int userBigramMax,
int userBigramDelete) { int userBigramDelete) {
this(tag, context, resId); this(tag, context, resId);
mUserBigram = new UserBigramDictionary(context, null, Locale.US.toString(), mUserBigram = new UserBigramDictionary(context, null, Locale.US.toString(),

View File

@ -36,9 +36,9 @@ public class SuggestPerformanceTests extends AndroidTestCase {
// For testing with real dictionary, TEMPORARILY COPY main dictionary into test directory. // For testing with real dictionary, TEMPORARILY COPY main dictionary into test directory.
// DO NOT SUBMIT real dictionary under test directory. // DO NOT SUBMIT real dictionary under test directory.
//int[] resId = new int[] { R.raw.main0, R.raw.main1, R.raw.main2 }; //int resId = R.raw.main;
int[] resId = new int[] { R.raw.test }; int resId = R.raw.test;
sh = new SuggestHelper(TAG, getTestContext(), resId); sh = new SuggestHelper(TAG, getTestContext(), resId);
loadString(); loadString();

View File

@ -26,7 +26,7 @@ public class SuggestTests extends AndroidTestCase {
@Override @Override
protected void setUp() { protected void setUp() {
int[] resId = new int[] { R.raw.test }; int resId = R.raw.test;
sh = new SuggestHelper(TAG, getTestContext(), resId); sh = new SuggestHelper(TAG, getTestContext(), resId);
} }

View File

@ -31,7 +31,7 @@ public class UserBigramTests extends AndroidTestCase {
@Override @Override
protected void setUp() { protected void setUp() {
int[] resId = new int[] { R.raw.test }; int resId = R.raw.test;
sh = new SuggestHelper(TAG, getTestContext(), resId, MAX_DATA, DELETE_DATA); sh = new SuggestHelper(TAG, getTestContext(), resId, MAX_DATA, DELETE_DATA);
} }