From 7eba0198c0344ce7aac9867711d7944811dd3ad5 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Thu, 27 Jun 2013 17:31:13 +0900 Subject: [PATCH] Dump binary dictionary information when opening Bug: 9459517 Change-Id: I122583e734936ae0284e1c7500c6c9242bc7973b --- ...oid_inputmethod_latin_BinaryDictionary.cpp | 4 +- native/jni/src/defines.h | 82 ++++++++++++------- .../jni/src/suggest/core/dicnode/dic_node.h | 7 +- .../dictionary/binary_dictionary_header.h | 14 ++++ ...binary_dictionary_header_reading_utils.cpp | 4 +- .../core/dictionary/binary_dictionary_info.h | 37 ++++++++- .../suggest/core/dictionary/dictionary.cpp | 6 +- .../src/suggest/core/dictionary/dictionary.h | 4 +- native/jni/src/utils/log_utils.h | 62 ++++++++++++++ 9 files changed, 180 insertions(+), 40 deletions(-) create mode 100644 native/jni/src/utils/log_utils.h diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index a93bbeb8c..2b8dbbcaf 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -93,8 +93,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s AKLOGE("DICT: dictionary format is unknown, bad magic number"); releaseDictBuf(static_cast(dictBuf) - offset, adjDictSize, fd); } else { - dictionary = new Dictionary( - dictBuf, static_cast(dictSize), fd, offset, updatableMmap); + dictionary = new Dictionary(env, dictBuf, static_cast(dictSize), fd, offset, + updatableMmap); } PROF_END(66); PROF_CLOSE; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index cb6681456..607a74400 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -35,6 +35,56 @@ // Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java #define MAX_PROXIMITY_CHARS_SIZE 16 #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) + +AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize, + char *dest, const int destSize) { + // We want to always terminate with a 0 char, so stop one short of the length to make + // sure there is room. + const int destLimit = destSize - 1; + int si = 0; + int di = 0; + while (si < sourceSize && di < destLimit && 0 != source[si]) { + const int codePoint = source[si++]; + if (codePoint < 0x7F) { // One byte + dest[di++] = codePoint; + } else if (codePoint < 0x7FF) { // Two bytes + if (di + 1 >= destLimit) break; + dest[di++] = 0xC0 + (codePoint >> 6); + dest[di++] = 0x80 + (codePoint & 0x3F); + } else if (codePoint < 0xFFFF) { // Three bytes + if (di + 2 >= destLimit) break; + dest[di++] = 0xE0 + (codePoint >> 12); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = 0x80 + (codePoint & 0x3F); + } else if (codePoint <= 0x1FFFFF) { // Four bytes + if (di + 3 >= destLimit) break; + dest[di++] = 0xF0 + (codePoint >> 18); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = 0x80 + (codePoint & 0x3F); + } else if (codePoint <= 0x3FFFFFF) { // Five bytes + if (di + 4 >= destLimit) break; + dest[di++] = 0xF8 + (codePoint >> 24); + dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = codePoint & 0x3F; + } else if (codePoint <= 0x7FFFFFFF) { // Six bytes + if (di + 5 >= destLimit) break; + dest[di++] = 0xFC + (codePoint >> 30); + dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = codePoint & 0x3F; + } else { + // Not a code point... skip. + } + } + dest[di] = 0; + return di; +} #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #include @@ -46,35 +96,13 @@ #define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) -#define INTS_TO_CHARS(input, length, output) do { \ - intArrayToCharArray(input, length, output); } while (0) - -// TODO: Support full UTF-8 conversion -AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize, - char *dest) { - int si = 0; - int di = 0; - while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) { - const int codePoint = source[si++]; - if (codePoint < 0x7F) { - dest[di++] = codePoint; - } else if (codePoint < 0x7FF) { - dest[di++] = 0xC0 + (codePoint >> 6); - dest[di++] = 0x80 + (codePoint & 0x3F); - } else if (codePoint < 0xFFFF) { - dest[di++] = 0xE0 + (codePoint >> 12); - dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6); - dest[di++] = 0x80 + (codePoint & 0x3F); - } - } - dest[di] = 0; - return di; -} +#define INTS_TO_CHARS(input, length, output, outlength) do { \ + intArrayToCharArray(input, length, output, outlength); } while (0) static inline void dumpWordInfo(const int *word, const int length, const int rank, const int probability) { static char charBuf[50]; - const int N = intArrayToCharArray(word, length, charBuf); + const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf)); if (N > 1) { AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability); } @@ -90,7 +118,7 @@ static inline void dumpResult(const int *outWords, const int *frequencies) { static AK_FORCE_INLINE void dumpWord(const int *word, const int length) { static char charBuf[50]; - const int N = intArrayToCharArray(word, length, charBuf); + const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf)); if (N > 1) { AKLOGI("[ %s ]", charBuf); } @@ -304,8 +332,6 @@ static inline void prof_out(void) { template AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; } template AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; } -#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) - // DEBUG #define INPUTLENGTH_FOR_DEBUG (-1) #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 52db8e9c7..017df34fd 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -28,15 +28,16 @@ #if DEBUG_DICT #define LOGI_SHOW_ADD_COST_PROP \ do { char charBuf[50]; \ - INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \ + INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \ __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \ getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0) #define DUMP_WORD_AND_SCORE(header) \ do { char charBuf[50]; char prevWordCharBuf[50]; \ - INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \ + INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \ - mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \ + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \ + NELEMS(prevWordCharBuf)); \ AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \ getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \ getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h index 6dba0b266..240512bce 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h @@ -53,6 +53,20 @@ class BinaryDictionaryHeader { return mMultiWordCostMultiplier; } + AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key, + int *outValue, int outValueSize) const { + if (outValueSize <= 0) return; + if (outValueSize == 1) { + outValue[0] = '\0'; + return; + } + if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo, + key, outValue, outValueSize)) { + outValue[0] = '?'; + outValue[1] = '\0'; + } + } + private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader); diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp index 6e1b15ce7..c4c4bedde 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp @@ -82,8 +82,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags if(ByteArrayUtils::compareStringInBufferWithCharArray( binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { // The key was found. - ByteArrayUtils::readStringAndAdvancePosition( + const int length = ByteArrayUtils::readStringAndAdvancePosition( binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos); + // Add a 0 terminator to the string. + outValue[length < outValueSize ? length : outValueSize - 1] = '\0'; return true; } ByteArrayUtils::advancePositionToBehindString( diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index 7cb31440a..0fd4f6961 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -20,23 +20,27 @@ #include #include "defines.h" +#include "jni.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_header.h" #include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h" +#include "utils/log_utils.h" namespace latinime { class BinaryDictionaryInfo { public: - BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd, - const int dictBufOffset, const bool isUpdatable) + BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf, + const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable) : mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd), mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable), mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( mDictBuf, mDictSize)), mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy( - mDictionaryFormat)) {} + mDictionaryFormat)) { + logDictionaryInfo(env); + } AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; @@ -88,6 +92,33 @@ class BinaryDictionaryInfo { const BinaryDictionaryHeader mDictionaryHeader; const uint8_t *const mDictRoot; const DictionaryStructurePolicy *const mStructurePolicy; + + AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const { + const int BUFFER_SIZE = 16; + int dictionaryIdCodePointBuffer[BUFFER_SIZE]; + int versionStringCodePointBuffer[BUFFER_SIZE]; + int dateStringCodePointBuffer[BUFFER_SIZE]; + mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary", + dictionaryIdCodePointBuffer, BUFFER_SIZE); + mDictionaryHeader.readHeaderValueOrQuestionMark("version", + versionStringCodePointBuffer, BUFFER_SIZE); + mDictionaryHeader.readHeaderValueOrQuestionMark("date", + dateStringCodePointBuffer, BUFFER_SIZE); + + char dictionaryIdCharBuffer[BUFFER_SIZE]; + char versionStringCharBuffer[BUFFER_SIZE]; + char dateStringCharBuffer[BUFFER_SIZE]; + intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, + dictionaryIdCharBuffer, BUFFER_SIZE); + intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, + versionStringCharBuffer, BUFFER_SIZE); + intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, + dateStringCharBuffer, BUFFER_SIZE); + + LogUtils::logToJava(env, + "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i", + dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize); + } }; } #endif /* LATINIME_BINARY_DICTIONARY_INFO_H */ diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 675b54972..f520a75b1 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -22,6 +22,7 @@ #include #include "defines.h" +#include "jni.h" #include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/session/dic_traverse_session.h" @@ -32,8 +33,9 @@ namespace latinime { -Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable) - : mBinaryDictionaryInfo(static_cast(dict), dictSize, mmapFd, +Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, + int dictBufOffset, bool isUpdatable) + : mBinaryDictionaryInfo(env, static_cast(dict), dictSize, mmapFd, dictBufOffset, isUpdatable), mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 94579c200..1bf24a85b 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -20,6 +20,7 @@ #include #include "defines.h" +#include "jni.h" #include "suggest/core/dictionary/binary_dictionary_info.h" namespace latinime { @@ -52,7 +53,8 @@ class Dictionary { static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000; static const int KIND_FLAG_EXACT_MATCH = 0x40000000; - Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable); + Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset, + bool isUpdatable); int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, diff --git a/native/jni/src/utils/log_utils.h b/native/jni/src/utils/log_utils.h new file mode 100644 index 000000000..ccbecfc1d --- /dev/null +++ b/native/jni/src/utils/log_utils.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_LOG_UTILS_H +#define LATINIME_LOG_UTILS_H + +#include +#include + +#include "defines.h" +#include "jni.h" + +namespace latinime { + +class LogUtils { + public: + static void logToJava(JNIEnv *const env, const char *const format, ...) +#ifdef __GNUC__ + __attribute__ ((format (printf, 2, 3))) +#endif // __GNUC__ + { + static const char *TAG = "LatinIME:LogUtils"; + const jclass androidUtilLogClass = env->FindClass("android/util/Log"); + const jmethodID logDotIMethodId = env->GetStaticMethodID(androidUtilLogClass, "i", + "(Ljava/lang/String;Ljava/lang/String;)I"); + const jstring javaTag = env->NewStringUTF(TAG); + + va_list argList; + va_start(argList, format); + // Get the necessary size. Add 1 for the 0 terminator. + const int size = vsnprintf(0, 0, format, argList) + 1; + va_end(argList); + char cString[size]; + va_start(argList, format); + vsnprintf(cString, size, format, argList); + va_end(argList); + + jstring javaString = env->NewStringUTF(cString); + env->CallStaticIntMethod(androidUtilLogClass, logDotIMethodId, javaTag, javaString); + env->DeleteLocalRef(javaString); + env->DeleteLocalRef(javaTag); + env->DeleteLocalRef(androidUtilLogClass); + } + + private: + DISALLOW_COPY_AND_ASSIGN(LogUtils); +}; +} // namespace latinime +#endif // LATINIME_LOG_UTILS_H