LatinIME/native/jni/src/utils/jni_data_utils.h

158 lines
7.1 KiB
C++

/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_JNI_DATA_UTILS_H
#define LATINIME_JNI_DATA_UTILS_H
#include <vector>
#include "defines.h"
#include "dictionary/header/header_read_write_utils.h"
#include "dictionary/interface/dictionary_header_structure_policy.h"
#include "dictionary/property/ngram_context.h"
#include "dictionary/property/word_property.h"
#include "jni.h"
#include "utils/char_utils.h"
namespace latinime {
class JniDataUtils {
public:
static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
if (!array) {
outVector->clear();
return;
}
const jsize arrayLength = env->GetArrayLength(array);
outVector->resize(arrayLength);
env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
}
static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env,
jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) {
DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
const int keyCount = env->GetArrayLength(attributeKeyStringArray);
for (int i = 0; i < keyCount; i++) {
jstring keyString = static_cast<jstring>(
env->GetObjectArrayElement(attributeKeyStringArray, i));
const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
char keyChars[keyUtf8Length + 1];
env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
env->DeleteLocalRef(keyString);
keyChars[keyUtf8Length] = '\0';
DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
jstring valueString = static_cast<jstring>(
env->GetObjectArrayElement(attributeValueStringArray, i));
const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
char valueChars[valueUtf8Length + 1];
env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
env->DeleteLocalRef(valueString);
valueChars[valueUtf8Length] = '\0';
DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
attributeMap[key] = value;
}
return attributeMap;
}
static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
const int maxLength, const int *const codePoints, const int codePointCount,
const bool needsNullTermination) {
const int codePointBufSize = std::min(maxLength, codePointCount);
int outputCodePonts[codePointBufSize];
int outputCodePointCount = 0;
for (int i = 0; i < codePointBufSize; ++i) {
const int codePoint = codePoints[i];
int codePointToOutput = codePoint;
if (!CharUtils::isInUnicodeSpace(codePoint)) {
if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) {
// Just skip Beginning-of-Sentence marker.
continue;
}
codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
} else if (codePoint >= 0x01 && codePoint <= 0x1F) {
// Control code.
codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
}
outputCodePonts[outputCodePointCount++] = codePointToOutput;
}
env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount,
outputCodePonts);
if (needsNullTermination && outputCodePointCount < maxLength) {
env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount,
1 /* len */, &CODE_POINT_NULL);
}
}
static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
for (size_t i = 0; i < prevWordCount; ++i) {
prevWordCodePointCount[i] = 0;
isBeginningOfSentence[i] = false;
jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
if (!prevWord) {
continue;
}
jsize prevWordLength = env->GetArrayLength(prevWord);
if (prevWordLength > MAX_WORD_LENGTH) {
continue;
}
env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
env->DeleteLocalRef(prevWord);
prevWordCodePointCount[i] = prevWordLength;
jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
&isBeginningOfSentenceBoolean);
isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
}
return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
prevWordCount);
}
static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
const jboolean value) {
env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
}
static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) {
env->SetIntArrayRegion(array, index, 1 /* len */, &value);
}
static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index,
const float value) {
env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
}
static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty,
jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo,
jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
jobject outShortcutProbabilities);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
static const int CODE_POINT_REPLACEMENT_CHARACTER;
static const int CODE_POINT_NULL;
};
} // namespace latinime
#endif // LATINIME_JNI_DATA_UTILS_H