From e9121a68a67b8723477668130a16d4c72d98f6fe Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 14 May 2014 20:05:10 +0900 Subject: [PATCH] Filter out control characters and non-unicode code points. Bug: 14119293 Bug: 13758761 Change-Id: I6b79cff0714152807f5e20b7a75060a0a772b28b --- native/jni/NativeFileList.mk | 1 + ...oid_inputmethod_latin_BinaryDictionary.cpp | 1 - native/jni/src/utils/jni_data_utils.cpp | 24 +++++++++++++++++++ native/jni/src/utils/jni_data_utils.h | 22 ++++++++++++++--- 4 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 native/jni/src/utils/jni_data_utils.cpp diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 6ccfec911..f418f6ece 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -95,6 +95,7 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix utils/, \ autocorrection_threshold_utils.cpp \ char_utils.cpp \ + jni_data_utils.cpp \ log_utils.cpp \ time_keeper.cpp) diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 9098b714b..418c77d7c 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -302,7 +302,6 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, int wordCodePoints[outCodePointsLength]; memset(wordCodePoints, 0, sizeof(wordCodePoints)); const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints); - env->SetIntArrayRegion(outCodePoints, 0, outCodePointsLength, wordCodePoints); JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, MAX_WORD_LENGTH /* maxLength */, wordCodePoints, outCodePointsLength, false /* needsNullTermination */); diff --git a/native/jni/src/utils/jni_data_utils.cpp b/native/jni/src/utils/jni_data_utils.cpp new file mode 100644 index 000000000..5555293d5 --- /dev/null +++ b/native/jni/src/utils/jni_data_utils.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/jni_data_utils.h" + +namespace latinime { + +const int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD; +const int JniDataUtils::CODE_POINT_NULL = 0; + +} // namespace latinime diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h index 0e393e315..01a5685b4 100644 --- a/native/jni/src/utils/jni_data_utils.h +++ b/native/jni/src/utils/jni_data_utils.h @@ -23,6 +23,7 @@ #include "jni.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include "utils/char_utils.h" namespace latinime { @@ -69,16 +70,31 @@ class JniDataUtils { const int maxLength, const int *const codePoints, const int codePointCount, const bool needsNullTermination) { const int outputCodePointCount = std::min(maxLength, codePointCount); - env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, codePoints); + int outputCodePonts[outputCodePointCount]; + for (int i = 0; i < outputCodePointCount; ++i) { + const int codePoint = codePoints[i]; + if (!CharUtils::isInUnicodeSpace(codePoint)) { + outputCodePonts[i] = CODE_POINT_REPLACEMENT_CHARACTER; + } else if (codePoint >= 0x01 && codePoint <= 0x1F) { + // Control code. + outputCodePonts[i] = CODE_POINT_REPLACEMENT_CHARACTER; + } else { + outputCodePonts[i] = codePoint; + } + } + env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, + outputCodePonts); if (needsNullTermination && outputCodePointCount < maxLength) { - const int terminal = 0; env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount, - 1 /* len */, &terminal); + 1 /* len */, &CODE_POINT_NULL); } } private: DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); + + static const int CODE_POINT_REPLACEMENT_CHARACTER; + static const int CODE_POINT_NULL; }; } // namespace latinime #endif // LATINIME_JNI_DATA_UTILS_H