LatinIME/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp

345 lines
14 KiB
C++

/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include <cerrno>
#include <cstring> // for memset()
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include "defines.h"
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "utils/autocorrection_threshold_utils.h"
namespace latinime {
class ProximityInfo;
// Helper method
static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd) {
int ret = munmap(const_cast<void *>(dictBuf), length);
if (ret != 0) {
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
}
ret = close(fd);
if (ret != 0) {
AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
}
}
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
PROF_OPEN;
PROF_START(66);
// TODO: Move dictionary buffer handling to policyimpl.
const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
if (sourceDirUtf8Length <= 0) {
AKLOGE("DICT: Can't get sourceDir string");
return 0;
}
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
int fd = 0;
void *dictBuf = 0;
int offset = 0;
const bool updatableMmap = (isUpdatable == JNI_TRUE);
const int openMode = updatableMmap ? O_RDWR : O_RDONLY;
fd = open(sourceDirChars, openMode);
if (fd < 0) {
AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
return 0;
}
int pagesize = getpagesize();
offset = static_cast<int>(dictOffset) % pagesize;
int adjDictOffset = static_cast<int>(dictOffset) - offset;
int adjDictSize = static_cast<int>(dictSize) + offset;
const int protMode = updatableMmap ? PROT_READ | PROT_WRITE : PROT_READ;
dictBuf = mmap(0, adjDictSize, protMode, MAP_PRIVATE, fd, adjDictOffset);
if (dictBuf == MAP_FAILED) {
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
return 0;
}
dictBuf = static_cast<char *>(dictBuf) + offset;
if (!dictBuf) {
AKLOGE("DICT: dictBuf is null");
return 0;
}
Dictionary *dictionary = 0;
if (FormatUtils::UNKNOWN_VERSION
== FormatUtils::detectFormatVersion(static_cast<uint8_t *>(dictBuf),
static_cast<int>(dictSize))) {
AKLOGE("DICT: dictionary format is unknown, bad magic number");
releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
} else {
dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
updatableMmap);
}
PROF_END(66);
PROF_CLOSE;
return reinterpret_cast<jlong>(dictionary);
}
static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const BinaryDictionaryInfo *const binaryDictionaryInfo = dictionary->getBinaryDictionaryInfo();
const int dictBufOffset = binaryDictionaryInfo->getDictBufOffset();
const void *dictBuf = binaryDictionaryInfo->getDictBuf();
if (!dictBuf) return;
releaseDictBuf(static_cast<const char *>(dictBuf) - dictBufOffset,
binaryDictionaryInfo->getDictSize() + dictBufOffset,
binaryDictionaryInfo->getMmapFd());
delete dictionary;
}
static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jint commitPoint, jintArray suggestOptions,
jintArray prevWordCodePointsForBigrams, jintArray outputCodePointsArray,
jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
DicTraverseSession *traverseSession =
reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
// Input values
int xCoordinates[inputSize];
int yCoordinates[inputSize];
int times[inputSize];
int pointerIds[inputSize];
const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
int inputCodePoints[inputCodePointsLength];
const jsize prevWordCodePointsLength =
prevWordCodePointsForBigrams ? env->GetArrayLength(prevWordCodePointsForBigrams) : 0;
int prevWordCodePointsInternal[prevWordCodePointsLength];
int *prevWordCodePoints = 0;
env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
env->GetIntArrayRegion(timesArray, 0, inputSize, times);
env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
if (prevWordCodePointsForBigrams) {
env->GetIntArrayRegion(prevWordCodePointsForBigrams, 0, prevWordCodePointsLength,
prevWordCodePointsInternal);
prevWordCodePoints = prevWordCodePointsInternal;
}
const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
int options[numberOfOptions];
env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options);
SuggestOptions givenSuggestOptions(options, numberOfOptions);
// Output values
/* By the way, let's check the output array length here to make sure */
const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray);
if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
ASSERT(false);
return 0;
}
const jsize scoresLength = env->GetArrayLength(scoresArray);
if (scoresLength != MAX_RESULTS) {
AKLOGE("Invalid scoresLength: %d", scoresLength);
ASSERT(false);
return 0;
}
int outputCodePoints[outputCodePointsLength];
int scores[scoresLength];
const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray);
int spaceIndices[spaceIndicesLength];
const jsize outputTypesLength = env->GetArrayLength(outputTypesArray);
int outputTypes[outputTypesLength];
memset(outputCodePoints, 0, sizeof(outputCodePoints));
memset(scores, 0, sizeof(scores));
memset(spaceIndices, 0, sizeof(spaceIndices));
memset(outputTypes, 0, sizeof(outputTypes));
int count;
if (givenSuggestOptions.isGesture() || inputSize > 0) {
count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
times, pointerIds, inputCodePoints, inputSize, prevWordCodePoints,
prevWordCodePointsLength, commitPoint, &givenSuggestOptions, outputCodePoints,
scores, spaceIndices, outputTypes);
} else {
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
outputCodePoints, scores, outputTypes);
}
// Copy back the output values
env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints);
env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores);
env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices);
env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes);
return count;
}
static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
return dictionary->getProbability(codePoints, wordLength);
}
static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
const jsize word0Length = env->GetArrayLength(word0);
const jsize word1Length = env->GetArrayLength(word1);
int word0CodePoints[word0Length];
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
return dictionary->isValidBigram(word0CodePoints, word0Length, word1CodePoints, word1Length);
}
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
jintArray before, jintArray after, jint score) {
jsize beforeLength = env->GetArrayLength(before);
jsize afterLength = env->GetArrayLength(after);
int beforeCodePoints[beforeLength];
int afterCodePoints[afterLength];
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
return AutocorrectionThresholdUtils::calcNormalizedScore(beforeCodePoints, beforeLength,
afterCodePoints, afterLength, score);
}
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, jintArray before,
jintArray after) {
jsize beforeLength = env->GetArrayLength(before);
jsize afterLength = env->GetArrayLength(after);
int beforeCodePoints[beforeLength];
int afterCodePoints[afterLength];
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
return AutocorrectionThresholdUtils::editDistance(beforeCodePoints, beforeLength,
afterCodePoints, afterLength);
}
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
}
jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
dictionary->addUnigramWord(codePoints, wordLength, probability);
}
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jintArray word1, jint probability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
}
jsize word0Length = env->GetArrayLength(word0);
int word0CodePoints[word0Length];
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints,
word1Length, probability);
}
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
}
jsize word0Length = env->GetArrayLength(word0);
int word0CodePoints[word0Length];
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
dictionary->removeBigramWords(word0CodePoints, word0Length, word1CodePoints,
word1Length);
}
static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("openNative"),
const_cast<char *>("(Ljava/lang/String;JJZ)J"),
reinterpret_cast<void *>(latinime_BinaryDictionary_open)
},
{
const_cast<char *>("closeNative"),
const_cast<char *>("(J)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_close)
},
{
const_cast<char *>("getSuggestionsNative"),
const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
},
{
const_cast<char *>("getProbabilityNative"),
const_cast<char *>("(J[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
},
{
const_cast<char *>("isValidBigramNative"),
const_cast<char *>("(J[I[I)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)
},
{
const_cast<char *>("calcNormalizedScoreNative"),
const_cast<char *>("([I[II)F"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)
},
{
const_cast<char *>("editDistanceNative"),
const_cast<char *>("([I[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)
},
{
const_cast<char *>("addUnigramWordNative"),
const_cast<char *>("(J[II)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
},
{
const_cast<char *>("addBigramWordsNative"),
const_cast<char *>("(J[I[II)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
},
{
const_cast<char *>("removeBigramWordsNative"),
const_cast<char *>("(J[I[I)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
}
};
int register_BinaryDictionary(JNIEnv *env) {
const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
}
} // namespace latinime