am 7eba0198
: Dump binary dictionary information when opening
* commit '7eba0198c0344ce7aac9867711d7944811dd3ad5': Dump binary dictionary information when opening
This commit is contained in:
commit
eb7cd63858
9 changed files with 180 additions and 40 deletions
|
@ -93,8 +93,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
|
|||
AKLOGE("DICT: dictionary format is unknown, bad magic number");
|
||||
releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
|
||||
} else {
|
||||
dictionary = new Dictionary(
|
||||
dictBuf, static_cast<int>(dictSize), fd, offset, updatableMmap);
|
||||
dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
|
||||
updatableMmap);
|
||||
}
|
||||
PROF_END(66);
|
||||
PROF_CLOSE;
|
||||
|
|
|
@ -35,6 +35,56 @@
|
|||
// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
|
||||
#define MAX_PROXIMITY_CHARS_SIZE 16
|
||||
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
|
||||
#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
|
||||
|
||||
AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
|
||||
char *dest, const int destSize) {
|
||||
// We want to always terminate with a 0 char, so stop one short of the length to make
|
||||
// sure there is room.
|
||||
const int destLimit = destSize - 1;
|
||||
int si = 0;
|
||||
int di = 0;
|
||||
while (si < sourceSize && di < destLimit && 0 != source[si]) {
|
||||
const int codePoint = source[si++];
|
||||
if (codePoint < 0x7F) { // One byte
|
||||
dest[di++] = codePoint;
|
||||
} else if (codePoint < 0x7FF) { // Two bytes
|
||||
if (di + 1 >= destLimit) break;
|
||||
dest[di++] = 0xC0 + (codePoint >> 6);
|
||||
dest[di++] = 0x80 + (codePoint & 0x3F);
|
||||
} else if (codePoint < 0xFFFF) { // Three bytes
|
||||
if (di + 2 >= destLimit) break;
|
||||
dest[di++] = 0xE0 + (codePoint >> 12);
|
||||
dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
|
||||
dest[di++] = 0x80 + (codePoint & 0x3F);
|
||||
} else if (codePoint <= 0x1FFFFF) { // Four bytes
|
||||
if (di + 3 >= destLimit) break;
|
||||
dest[di++] = 0xF0 + (codePoint >> 18);
|
||||
dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
|
||||
dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
|
||||
dest[di++] = 0x80 + (codePoint & 0x3F);
|
||||
} else if (codePoint <= 0x3FFFFFF) { // Five bytes
|
||||
if (di + 4 >= destLimit) break;
|
||||
dest[di++] = 0xF8 + (codePoint >> 24);
|
||||
dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
|
||||
dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
|
||||
dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
|
||||
dest[di++] = codePoint & 0x3F;
|
||||
} else if (codePoint <= 0x7FFFFFFF) { // Six bytes
|
||||
if (di + 5 >= destLimit) break;
|
||||
dest[di++] = 0xFC + (codePoint >> 30);
|
||||
dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F);
|
||||
dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
|
||||
dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
|
||||
dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
|
||||
dest[di++] = codePoint & 0x3F;
|
||||
} else {
|
||||
// Not a code point... skip.
|
||||
}
|
||||
}
|
||||
dest[di] = 0;
|
||||
return di;
|
||||
}
|
||||
|
||||
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
|
||||
#include <android/log.h>
|
||||
|
@ -46,35 +96,13 @@
|
|||
|
||||
#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
|
||||
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
|
||||
#define INTS_TO_CHARS(input, length, output) do { \
|
||||
intArrayToCharArray(input, length, output); } while (0)
|
||||
|
||||
// TODO: Support full UTF-8 conversion
|
||||
AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize,
|
||||
char *dest) {
|
||||
int si = 0;
|
||||
int di = 0;
|
||||
while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) {
|
||||
const int codePoint = source[si++];
|
||||
if (codePoint < 0x7F) {
|
||||
dest[di++] = codePoint;
|
||||
} else if (codePoint < 0x7FF) {
|
||||
dest[di++] = 0xC0 + (codePoint >> 6);
|
||||
dest[di++] = 0x80 + (codePoint & 0x3F);
|
||||
} else if (codePoint < 0xFFFF) {
|
||||
dest[di++] = 0xE0 + (codePoint >> 12);
|
||||
dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6);
|
||||
dest[di++] = 0x80 + (codePoint & 0x3F);
|
||||
}
|
||||
}
|
||||
dest[di] = 0;
|
||||
return di;
|
||||
}
|
||||
#define INTS_TO_CHARS(input, length, output, outlength) do { \
|
||||
intArrayToCharArray(input, length, output, outlength); } while (0)
|
||||
|
||||
static inline void dumpWordInfo(const int *word, const int length, const int rank,
|
||||
const int probability) {
|
||||
static char charBuf[50];
|
||||
const int N = intArrayToCharArray(word, length, charBuf);
|
||||
const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
|
||||
if (N > 1) {
|
||||
AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
|
||||
}
|
||||
|
@ -90,7 +118,7 @@ static inline void dumpResult(const int *outWords, const int *frequencies) {
|
|||
|
||||
static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
|
||||
static char charBuf[50];
|
||||
const int N = intArrayToCharArray(word, length, charBuf);
|
||||
const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
|
||||
if (N > 1) {
|
||||
AKLOGI("[ %s ]", charBuf);
|
||||
}
|
||||
|
@ -304,8 +332,6 @@ static inline void prof_out(void) {
|
|||
template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
|
||||
template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }
|
||||
|
||||
#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
|
||||
|
||||
// DEBUG
|
||||
#define INPUTLENGTH_FOR_DEBUG (-1)
|
||||
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
|
||||
|
|
|
@ -28,15 +28,16 @@
|
|||
#if DEBUG_DICT
|
||||
#define LOGI_SHOW_ADD_COST_PROP \
|
||||
do { char charBuf[50]; \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
|
||||
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
|
||||
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
|
||||
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
|
||||
#define DUMP_WORD_AND_SCORE(header) \
|
||||
do { char charBuf[50]; char prevWordCharBuf[50]; \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
|
||||
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
|
||||
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \
|
||||
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
|
||||
NELEMS(prevWordCharBuf)); \
|
||||
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
|
||||
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
|
||||
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
|
||||
|
|
|
@ -53,6 +53,20 @@ class BinaryDictionaryHeader {
|
|||
return mMultiWordCostMultiplier;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key,
|
||||
int *outValue, int outValueSize) const {
|
||||
if (outValueSize <= 0) return;
|
||||
if (outValueSize == 1) {
|
||||
outValue[0] = '\0';
|
||||
return;
|
||||
}
|
||||
if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
|
||||
key, outValue, outValueSize)) {
|
||||
outValue[0] = '?';
|
||||
outValue[1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);
|
||||
|
||||
|
|
|
@ -82,8 +82,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
|||
if(ByteArrayUtils::compareStringInBufferWithCharArray(
|
||||
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
|
||||
// The key was found.
|
||||
ByteArrayUtils::readStringAndAdvancePosition(
|
||||
const int length = ByteArrayUtils::readStringAndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
|
||||
// Add a 0 terminator to the string.
|
||||
outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
|
||||
return true;
|
||||
}
|
||||
ByteArrayUtils::advancePositionToBehindString(
|
||||
|
|
|
@ -20,23 +20,27 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_header.h"
|
||||
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
|
||||
#include "utils/log_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BinaryDictionaryInfo {
|
||||
public:
|
||||
BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
|
||||
const int dictBufOffset, const bool isUpdatable)
|
||||
BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf,
|
||||
const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable)
|
||||
: mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
|
||||
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
|
||||
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
|
||||
mDictBuf, mDictSize)),
|
||||
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
|
||||
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
|
||||
mDictionaryFormat)) {}
|
||||
mDictionaryFormat)) {
|
||||
logDictionaryInfo(env);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
|
||||
return mDictBuf;
|
||||
|
@ -88,6 +92,33 @@ class BinaryDictionaryInfo {
|
|||
const BinaryDictionaryHeader mDictionaryHeader;
|
||||
const uint8_t *const mDictRoot;
|
||||
const DictionaryStructurePolicy *const mStructurePolicy;
|
||||
|
||||
AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
|
||||
const int BUFFER_SIZE = 16;
|
||||
int dictionaryIdCodePointBuffer[BUFFER_SIZE];
|
||||
int versionStringCodePointBuffer[BUFFER_SIZE];
|
||||
int dateStringCodePointBuffer[BUFFER_SIZE];
|
||||
mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary",
|
||||
dictionaryIdCodePointBuffer, BUFFER_SIZE);
|
||||
mDictionaryHeader.readHeaderValueOrQuestionMark("version",
|
||||
versionStringCodePointBuffer, BUFFER_SIZE);
|
||||
mDictionaryHeader.readHeaderValueOrQuestionMark("date",
|
||||
dateStringCodePointBuffer, BUFFER_SIZE);
|
||||
|
||||
char dictionaryIdCharBuffer[BUFFER_SIZE];
|
||||
char versionStringCharBuffer[BUFFER_SIZE];
|
||||
char dateStringCharBuffer[BUFFER_SIZE];
|
||||
intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
|
||||
dictionaryIdCharBuffer, BUFFER_SIZE);
|
||||
intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
|
||||
versionStringCharBuffer, BUFFER_SIZE);
|
||||
intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
|
||||
dateStringCharBuffer, BUFFER_SIZE);
|
||||
|
||||
LogUtils::logToJava(env,
|
||||
"Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
|
||||
dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
#include "suggest/core/dictionary/bigram_dictionary.h"
|
||||
#include "suggest/core/dictionary/binary_format.h"
|
||||
#include "suggest/core/session/dic_traverse_session.h"
|
||||
|
@ -32,8 +33,9 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable)
|
||||
: mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd,
|
||||
Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd,
|
||||
int dictBufOffset, bool isUpdatable)
|
||||
: mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd,
|
||||
dictBufOffset, isUpdatable),
|
||||
mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)),
|
||||
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -52,7 +53,8 @@ class Dictionary {
|
|||
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
|
||||
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
|
||||
|
||||
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable);
|
||||
Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset,
|
||||
bool isUpdatable);
|
||||
|
||||
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
|
||||
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
|
||||
|
|
62
native/jni/src/utils/log_utils.h
Normal file
62
native/jni/src/utils/log_utils.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_LOG_UTILS_H
|
||||
#define LATINIME_LOG_UTILS_H
|
||||
|
||||
#include <cstdio>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class LogUtils {
|
||||
public:
|
||||
static void logToJava(JNIEnv *const env, const char *const format, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__ ((format (printf, 2, 3)))
|
||||
#endif // __GNUC__
|
||||
{
|
||||
static const char *TAG = "LatinIME:LogUtils";
|
||||
const jclass androidUtilLogClass = env->FindClass("android/util/Log");
|
||||
const jmethodID logDotIMethodId = env->GetStaticMethodID(androidUtilLogClass, "i",
|
||||
"(Ljava/lang/String;Ljava/lang/String;)I");
|
||||
const jstring javaTag = env->NewStringUTF(TAG);
|
||||
|
||||
va_list argList;
|
||||
va_start(argList, format);
|
||||
// Get the necessary size. Add 1 for the 0 terminator.
|
||||
const int size = vsnprintf(0, 0, format, argList) + 1;
|
||||
va_end(argList);
|
||||
char cString[size];
|
||||
va_start(argList, format);
|
||||
vsnprintf(cString, size, format, argList);
|
||||
va_end(argList);
|
||||
|
||||
jstring javaString = env->NewStringUTF(cString);
|
||||
env->CallStaticIntMethod(androidUtilLogClass, logDotIMethodId, javaTag, javaString);
|
||||
env->DeleteLocalRef(javaString);
|
||||
env->DeleteLocalRef(javaTag);
|
||||
env->DeleteLocalRef(androidUtilLogClass);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(LogUtils);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_LOG_UTILS_H
|
Loading…
Reference in a new issue