/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin; import android.text.TextUtils; import android.util.SparseArray; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.settings.NativeSuggestOptions; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.JniUtils; import com.android.inputmethod.latin.utils.StringUtils; import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Locale; /** * Implements a static, compacted, binary dictionary of standard words. */ // TODO: All methods which should be locked need to have a suffix "Locked". public final class BinaryDictionary extends Dictionary { private static final String TAG = BinaryDictionary.class.getSimpleName(); // Must be equal to MAX_WORD_LENGTH in native/jni/src/defines.h private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; // Must be equal to MAX_RESULTS in native/jni/src/defines.h private static final int MAX_RESULTS = 18; // Required space count for auto commit. // TODO: Remove this heuristic. private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3; private long mNativeDict; private final Locale mLocale; private final long mDictSize; private final String mDictFilePath; private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH]; private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS]; private final int[] mSpaceIndices = new int[MAX_RESULTS]; private final int[] mOutputScores = new int[MAX_RESULTS]; private final int[] mOutputTypes = new int[MAX_RESULTS]; private final int[] mOutputAutoCommitFirstWordConfidence = new int[MAX_RESULTS]; private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions(); private final SparseArray mDicTraverseSessions = CollectionUtils.newSparseArray(); // TODO: There should be a way to remove used DicTraverseSession objects from // {@code mDicTraverseSessions}. private DicTraverseSession getTraverseSession(final int traverseSessionId) { synchronized(mDicTraverseSessions) { DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId); if (traverseSession == null) { traverseSession = mDicTraverseSessions.get(traverseSessionId); if (traverseSession == null) { traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize); mDicTraverseSessions.put(traverseSessionId, traverseSession); } } return traverseSession; } } /** * Constructor for the binary dictionary. This is supposed to be called from the * dictionary factory. * @param filename the name of the file to read through native code. * @param offset the offset of the dictionary data within the file. * @param length the length of the binary data. * @param useFullEditDistance whether to use the full edit distance in suggestions * @param dictType the dictionary type, as a human-readable string * @param isUpdatable whether to open the dictionary file in writable mode. */ public BinaryDictionary(final String filename, final long offset, final long length, final boolean useFullEditDistance, final Locale locale, final String dictType, final boolean isUpdatable) { super(dictType); mLocale = locale; mDictSize = length; mDictFilePath = filename; mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance); loadDictionary(filename, offset, length, isUpdatable); } static { JniUtils.loadNativeLibrary(); } private static native long openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable); private static native void flushNative(long dict, String filePath); private static native boolean needsToRunGCNative(long dict); private static native void flushWithGCNative(long dict, String filePath); private static native void closeNative(long dict); private static native int getProbabilityNative(long dict, int[] word); private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); private static native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, int[] suggestOptions, int[] prevWordCodePointArray, int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence); private static native float calcNormalizedScoreNative(int[] before, int[] after, int score); private static native int editDistanceNative(int[] before, int[] after); private static native void addUnigramWordNative(long dict, int[] word, int probability); private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, int probability); private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); private static native int calculateProbabilityNative(long dict, int unigramProbability, int bigramProbability); // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable) { mNativeDict = openNative(path, startOffset, length, isUpdatable); } @Override public ArrayList getSuggestions(final WordComposer composer, final String prevWord, final ProximityInfo proximityInfo, final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) { return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords, additionalFeaturesOptions, 0 /* sessionId */); } @Override public ArrayList getSuggestionsWithSessionId(final WordComposer composer, final String prevWord, final ProximityInfo proximityInfo, final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, final int sessionId) { if (!isValidDictionary()) return null; Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE); // TODO: toLowerCase in the native code final int[] prevWordCodePointArray = (null == prevWord) ? null : StringUtils.toCodePointArray(prevWord); final int composerSize = composer.size(); final boolean isGesture = composer.isBatchMode(); if (composerSize <= 1 || !isGesture) { if (composerSize > MAX_WORD_LENGTH - 1) return null; for (int i = 0; i < composerSize; i++) { mInputCodePoints[i] = composer.getCodeAt(i); } } final InputPointers ips = composer.getInputPointers(); final int inputSize = isGesture ? ips.getPointerSize() : composerSize; mNativeSuggestOptions.setIsGesture(isGesture); mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions); // proximityInfo and/or prevWordForBigrams may not be null. final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), getTraverseSession(sessionId).getSession(), ips.getXCoordinates(), ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints, inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes, mOutputAutoCommitFirstWordConfidence); final ArrayList suggestions = CollectionUtils.newArrayList(); for (int j = 0; j < count; ++j) { final int start = j * MAX_WORD_LENGTH; int len = 0; while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) { ++len; } if (len > 0) { final int flags = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_FLAGS; if (blockOffensiveWords && 0 != (flags & SuggestedWordInfo.KIND_FLAG_POSSIBLY_OFFENSIVE) && 0 == (flags & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH)) { // If we block potentially offensive words, and if the word is possibly // offensive, then we don't output it unless it's also an exact match. continue; } final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND; final int score = SuggestedWordInfo.KIND_WHITELIST == kind ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j]; // TODO: check that all users of the `kind' parameter are ready to accept // flags too and pass mOutputTypes[j] instead of kind suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len), score, kind, this /* sourceDict */, mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, mOutputAutoCommitFirstWordConfidence[0])); } } return suggestions; } public boolean isValidDictionary() { return mNativeDict != 0; } public static float calcNormalizedScore(final String before, final String after, final int score) { return calcNormalizedScoreNative(StringUtils.toCodePointArray(before), StringUtils.toCodePointArray(after), score); } public static int editDistance(final String before, final String after) { if (before == null || after == null) { throw new IllegalArgumentException(); } return editDistanceNative(StringUtils.toCodePointArray(before), StringUtils.toCodePointArray(after)); } @Override public boolean isValidWord(final String word) { return getFrequency(word) != NOT_A_PROBABILITY; } @Override public int getFrequency(final String word) { if (word == null) return NOT_A_PROBABILITY; int[] codePoints = StringUtils.toCodePointArray(word); return getProbabilityNative(mNativeDict, codePoints); } // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni // calls when checking for changes in an entire dictionary. public boolean isValidBigram(final String word0, final String word1) { return getBigramProbability(word0, word1) != NOT_A_PROBABILITY; } public int getBigramProbability(final String word0, final String word1) { if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY; final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); } private void runGCIfRequired() { if (needsToRunGCNative(mNativeDict)) { flushWithGC(); } } // Add a unigram entry to binary dictionary in native code. public void addUnigramWord(final String word, final int probability) { if (TextUtils.isEmpty(word)) { return; } runGCIfRequired(); final int[] codePoints = StringUtils.toCodePointArray(word); addUnigramWordNative(mNativeDict, codePoints, probability); } // Add a bigram entry to binary dictionary in native code. public void addBigramWords(final String word0, final String word1, final int probability) { if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { return; } runGCIfRequired(); final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability); } // Remove a bigram entry form binary dictionary in native code. public void removeBigramWords(final String word0, final String word1) { if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) { return; } runGCIfRequired(); final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); } private void reopen() { close(); final File dictFile = new File(mDictFilePath); mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */, dictFile.length(), true /* isUpdatable */); } public void flush() { if (!isValidDictionary()) return; flushNative(mNativeDict, mDictFilePath); reopen(); } public void flushWithGC() { if (!isValidDictionary()) return; flushWithGCNative(mNativeDict, mDictFilePath); reopen(); } public boolean needsToRunGC() { if (!isValidDictionary()) return false; return needsToRunGCNative(mNativeDict); } @UsedForTesting public int calculateProbability(final int unigramProbability, final int bigramProbability) { if (!isValidDictionary()) return NOT_A_PROBABILITY; return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability); } @Override public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { // TODO: actually use the confidence rather than use this completely broken heuristic final String word = candidate.mWord; final int length = word.length(); int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT; for (int i = 0; i < length; ++i) { // This is okay because no low-surrogate and no high-surrogate can ever match the // space character, so we don't need to take care of iterating on code points. if (Constants.CODE_SPACE == word.charAt(i)) { if (0 >= --remainingSpaces) return true; } } return false; } @Override public void close() { synchronized (mDicTraverseSessions) { final int sessionsSize = mDicTraverseSessions.size(); for (int index = 0; index < sessionsSize; ++index) { final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index); if (traverseSession != null) { traverseSession.close(); } } mDicTraverseSessions.clear(); } closeInternalLocked(); } private synchronized void closeInternalLocked() { if (mNativeDict != 0) { closeNative(mNativeDict); mNativeDict = 0; } } // TODO: Manage BinaryDictionary instances without using WeakReference or something. @Override protected void finalize() throws Throwable { try { closeInternalLocked(); } finally { super.finalize(); } } }