2014-04-14 09:49:19 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2014 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef LATINIME_JNI_DATA_UTILS_H
|
|
|
|
#define LATINIME_JNI_DATA_UTILS_H
|
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "defines.h"
|
|
|
|
#include "jni.h"
|
2014-12-15 09:09:45 +00:00
|
|
|
#include "suggest/core/dictionary/property/word_property.h"
|
2014-10-09 12:28:19 +00:00
|
|
|
#include "suggest/core/session/ngram_context.h"
|
2014-04-21 23:41:39 +00:00
|
|
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
|
|
|
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
2014-05-14 11:05:10 +00:00
|
|
|
#include "utils/char_utils.h"
|
2014-04-14 09:49:19 +00:00
|
|
|
|
|
|
|
namespace latinime {
|
|
|
|
|
|
|
|
class JniDataUtils {
|
|
|
|
public:
|
|
|
|
static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
|
|
|
|
if (!array) {
|
|
|
|
outVector->clear();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const jsize arrayLength = env->GetArrayLength(array);
|
|
|
|
outVector->resize(arrayLength);
|
|
|
|
env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
|
|
|
|
}
|
|
|
|
|
2014-04-21 23:41:39 +00:00
|
|
|
static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env,
|
|
|
|
jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) {
|
|
|
|
DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
|
|
|
|
const int keyCount = env->GetArrayLength(attributeKeyStringArray);
|
|
|
|
for (int i = 0; i < keyCount; i++) {
|
|
|
|
jstring keyString = static_cast<jstring>(
|
|
|
|
env->GetObjectArrayElement(attributeKeyStringArray, i));
|
|
|
|
const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
|
|
|
|
char keyChars[keyUtf8Length + 1];
|
|
|
|
env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
|
2014-10-23 05:32:45 +00:00
|
|
|
env->DeleteLocalRef(keyString);
|
2014-04-21 23:41:39 +00:00
|
|
|
keyChars[keyUtf8Length] = '\0';
|
|
|
|
DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
|
|
|
|
HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
|
|
|
|
|
|
|
|
jstring valueString = static_cast<jstring>(
|
|
|
|
env->GetObjectArrayElement(attributeValueStringArray, i));
|
|
|
|
const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
|
|
|
|
char valueChars[valueUtf8Length + 1];
|
|
|
|
env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
|
2014-10-23 05:32:45 +00:00
|
|
|
env->DeleteLocalRef(valueString);
|
2014-04-21 23:41:39 +00:00
|
|
|
valueChars[valueUtf8Length] = '\0';
|
|
|
|
DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
|
|
|
|
HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
|
|
|
|
attributeMap[key] = value;
|
|
|
|
}
|
|
|
|
return attributeMap;
|
|
|
|
}
|
|
|
|
|
2014-05-14 08:42:47 +00:00
|
|
|
static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
|
|
|
|
const int maxLength, const int *const codePoints, const int codePointCount,
|
|
|
|
const bool needsNullTermination) {
|
2014-06-24 03:37:07 +00:00
|
|
|
const int codePointBufSize = std::min(maxLength, codePointCount);
|
|
|
|
int outputCodePonts[codePointBufSize];
|
|
|
|
int outputCodePointCount = 0;
|
|
|
|
for (int i = 0; i < codePointBufSize; ++i) {
|
2014-05-14 11:05:10 +00:00
|
|
|
const int codePoint = codePoints[i];
|
2014-06-24 03:37:07 +00:00
|
|
|
int codePointToOutput = codePoint;
|
2014-05-14 11:05:10 +00:00
|
|
|
if (!CharUtils::isInUnicodeSpace(codePoint)) {
|
2014-06-24 03:37:07 +00:00
|
|
|
if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) {
|
|
|
|
// Just skip Beginning-of-Sentence marker.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
|
2014-05-14 11:05:10 +00:00
|
|
|
} else if (codePoint >= 0x01 && codePoint <= 0x1F) {
|
|
|
|
// Control code.
|
2014-06-24 03:37:07 +00:00
|
|
|
codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
|
2014-05-14 11:05:10 +00:00
|
|
|
}
|
2014-06-24 03:37:07 +00:00
|
|
|
outputCodePonts[outputCodePointCount++] = codePointToOutput;
|
2014-05-14 11:05:10 +00:00
|
|
|
}
|
|
|
|
env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount,
|
|
|
|
outputCodePonts);
|
2014-05-14 08:42:47 +00:00
|
|
|
if (needsNullTermination && outputCodePointCount < maxLength) {
|
|
|
|
env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount,
|
2014-05-14 11:05:10 +00:00
|
|
|
1 /* len */, &CODE_POINT_NULL);
|
2014-05-14 08:42:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-08 03:05:13 +00:00
|
|
|
static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
|
2014-09-16 11:52:32 +00:00
|
|
|
jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
|
2014-06-26 09:47:25 +00:00
|
|
|
int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
|
|
|
|
int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
|
|
|
bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
2014-09-16 11:52:32 +00:00
|
|
|
for (size_t i = 0; i < prevWordCount; ++i) {
|
2014-06-26 09:47:25 +00:00
|
|
|
prevWordCodePointCount[i] = 0;
|
|
|
|
isBeginningOfSentence[i] = false;
|
|
|
|
jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
|
|
|
|
if (!prevWord) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
jsize prevWordLength = env->GetArrayLength(prevWord);
|
|
|
|
if (prevWordLength > MAX_WORD_LENGTH) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
|
2014-10-23 05:32:45 +00:00
|
|
|
env->DeleteLocalRef(prevWord);
|
2014-06-26 09:47:25 +00:00
|
|
|
prevWordCodePointCount[i] = prevWordLength;
|
|
|
|
jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
|
|
|
|
env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
|
|
|
|
&isBeginningOfSentenceBoolean);
|
|
|
|
isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
|
|
|
|
}
|
2014-10-08 03:05:13 +00:00
|
|
|
return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
|
2014-09-16 11:52:32 +00:00
|
|
|
prevWordCount);
|
2014-06-26 09:47:25 +00:00
|
|
|
}
|
|
|
|
|
2014-06-24 03:37:07 +00:00
|
|
|
static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
|
|
|
|
const jboolean value) {
|
|
|
|
env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
|
|
|
|
}
|
|
|
|
|
2014-05-14 10:47:03 +00:00
|
|
|
static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) {
|
|
|
|
env->SetIntArrayRegion(array, index, 1 /* len */, &value);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index,
|
|
|
|
const float value) {
|
|
|
|
env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
|
|
|
|
}
|
|
|
|
|
2014-12-15 09:09:45 +00:00
|
|
|
static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty,
|
|
|
|
jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo,
|
|
|
|
jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
|
|
|
|
jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
|
|
|
|
jobject outShortcutProbabilities);
|
|
|
|
|
2014-04-14 09:49:19 +00:00
|
|
|
private:
|
|
|
|
DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
|
2014-05-14 11:05:10 +00:00
|
|
|
|
|
|
|
static const int CODE_POINT_REPLACEMENT_CHARACTER;
|
|
|
|
static const int CODE_POINT_NULL;
|
2014-04-14 09:49:19 +00:00
|
|
|
};
|
|
|
|
} // namespace latinime
|
|
|
|
#endif // LATINIME_JNI_DATA_UTILS_H
|